//----------------------------------------------------------------------------------
// File:        CDx12Api.cpp
// SDK Version: 1.0.2
//
// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: LicenseRef-NvidiaProprietary
//
// NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
// property and proprietary rights in and to this material, related
// documentation and any modifications thereto. Any use, reproduction,
// disclosure or distribution of this material and related documentation
// without an express license agreement from NVIDIA CORPORATION or
// its affiliates is strictly prohibited.
//
//----------------------------------------------------------------------------------

///////////////////////////////////////////
// CDx12Api.cpp
//
// Top level API into DX12 video devices
//

#include <initguid.h>
#include "CDx12Api.h"
#include "CDx12Sync.h"
#include "CDx12SwapChain.h"
#include "d3dx12.h"

#pragma comment( lib, "d3d12" ) 

#define TEST_NV12_PITCH     ROUNDUP(m_uWidth, 256)
#define TEST_ARGB_PITCH     ROUNDUP(m_uWidth*4, 256)




HRESULT CDx12Api::Init(CDx1xCommon* pCDx1xCommon, UINT gpuIndex, HWND hWndDisplay)
{
    HRESULT hr = S_OK;

    m_gpuIndex = gpuIndex;
    m_gpuMask = 1 << m_gpuIndex;
    IDXGIAdapter4* pAdapter = pCDx1xCommon->GetDxgiAdapterForWnd(hWndDisplay);
    if (!pAdapter) return E_FAIL;

    hr = D3D12CreateDevice(pAdapter, D3D_FEATURE_LEVEL_11_1, IID_PPV_ARGS(&m_pD3D12Device));
    if (FAILED(hr)) return hr;

    hr = m_pD3D12Device->QueryInterface(IID_PPV_ARGS(&m_pD3D12VideoDevice));
    if (FAILED(hr)) return hr;

    {
        // Describe and create the command queue.
        D3D12_COMMAND_QUEUE_DESC queueDesc = {};
        queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
        queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;

        hr = m_pD3D12Device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&m_pCommandQueue));
        if (FAILED(hr)) return hr;
    }
    hr = m_pD3D12Device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_pCommandAllocator));
    if (FAILED(hr)) return hr;
    hr = m_pD3D12Device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_pCommandAllocator, nullptr, IID_PPV_ARGS(&m_pCommandList));
    if (FAILED(hr)) return hr;
    hr = m_pCommandList->Close();
    if (FAILED(hr)) return hr;

    // Describe and create the videoprocess command queue.
    D3D12_COMMAND_QUEUE_DESC queueDesc = { D3D12_COMMAND_LIST_TYPE_VIDEO_PROCESS, 0, D3D12_COMMAND_QUEUE_FLAG_NONE, m_gpuMask };
    hr = m_pD3D12Device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&m_pVideoCommandQueue));
    if (FAILED(hr)) return hr;
    hr = m_pD3D12Device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_VIDEO_PROCESS, IID_PPV_ARGS(&m_pVideoCommandAllocator));
    if (FAILED(hr)) return hr;
    hr = m_pD3D12Device->CreateCommandList(m_gpuMask, D3D12_COMMAND_LIST_TYPE_VIDEO_PROCESS, m_pVideoCommandAllocator, nullptr, IID_PPV_ARGS(&m_pVideoProcessCommandList));
    if (FAILED(hr)) return hr;
    hr = m_pVideoProcessCommandList->Close();
    if (FAILED(hr)) return hr;

    hr = m_pVideoProcessCommandList->QueryInterface(IID_PPV_ARGS(&m_pVideoProcessCommandList1));

    hr = m_pCommandAllocator->Reset();
    if (FAILED(hr)) return hr;
    hr = m_pCommandList->Reset(m_pCommandAllocator, nullptr);
    if (FAILED(hr)) return hr;

    hr = CreateVideoTexture(0x1000, 1, DXGI_FORMAT_UNKNOWN, false, true, false, nullptr, &m_pPredication, m_gpuMask, m_gpuMask);
    if (FAILED(hr)) return hr;

    D3D12_RANGE range = { 0, 0 };
    UINT64 *pPredicationBuffer = NULL;
    hr = m_pPredication->Map(0, &range, (void**)&pPredicationBuffer);
    if (FAILED(hr)) return hr;
    *pPredicationBuffer = 1;
    m_pPredication->Unmap(0, nullptr);

    D3D12_FEATURE_DATA_D3D12_OPTIONS3 options;
    memset(&options, 0, sizeof(options));
    hr = m_pD3D12Device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &options, sizeof(options));
    if (SUCCEEDED(hr) && (options.WriteBufferImmediateSupportFlags & D3D12_COMMAND_LIST_SUPPORT_FLAG_VIDEO_PROCESS))
    {
        // Setup write immediate buffer (optional)
        hr = CreateVideoTexture(0x1000, 1, DXGI_FORMAT_UNKNOWN, false, false, false, nullptr, &m_pWriteImmediateBuffer, m_gpuMask, m_gpuMask);
        if (FAILED(hr)) return FALSE;

        unsigned long long *pWriteImmmediateBuffer = NULL;
        hr = m_pWriteImmediateBuffer->Map(0, &range, (void**)&pWriteImmmediateBuffer);
        if (FAILED(hr)) return FALSE;
        memset(pWriteImmmediateBuffer, 0, 0x1000);
        m_pWriteImmediateBuffer->Unmap(0, nullptr);
    }

    // Create allocator sync obj
    m_AllocatorSyncObj = new CDx12SyncObject(m_pD3D12Device);

    // Close the command list
    hr = m_pCommandList->Close();
    if (FAILED(hr)) return hr;

    // Execute the command list.
    ID3D12CommandList* ppCommandLists[] = { m_pCommandList };
    m_pCommandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);

    return hr;
}

void CDx12Api::ShutDown()
{
    // make sure everything is stopped
    if (m_AllocatorSyncObj && m_pVideoCommandQueue)
    {
        m_AllocatorSyncObj->SignalFence(m_pVideoCommandQueue);
        m_AllocatorSyncObj->WaitForCPUFence();
    }
    if (m_AllocatorSyncObj && m_pCommandQueue)
    {
        m_AllocatorSyncObj->SignalFence(m_pCommandQueue);
        m_AllocatorSyncObj->WaitForCPUFence();
    }
    SafeRelease(m_pCommandQueue);
    SafeRelease(m_pVideoCommandQueue);

    SafeDelete(m_AllocatorSyncObj);
    SafeRelease(m_pWriteImmediateBuffer);
    SafeRelease(m_pPredication);
    SafeRelease(m_pD3D12VideoProcessor);
    SafeRelease(m_pVideoProcessCommandList1);
    SafeRelease(m_pVideoProcessCommandList);
    SafeRelease(m_pVideoCommandAllocator);
    SafeRelease(m_pD3D12VideoDevice);
    SafeRelease(m_pCommandList);
    SafeRelease(m_pCommandAllocator);
    SafeRelease(m_pD3D12Device);
}


HRESULT CDx12Api::VpInit(UINT NumStreams, D3D12_VIDEO_PROCESS_INPUT_STREAM_DESC* pVPInputStreamDesc,
                            UINT WidthOut, UINT HeightOut, DXGI_FORMAT vpOutFormat, DXGI_COLOR_SPACE_TYPE OutputSpace,
                            FLOAT BackgroundColor[4], ID3D12VideoProcessor** ppD3D12VideoProcessor)
{
    // make sure the allocator is ready
    m_AllocatorSyncObj->SignalFence(m_pCommandQueue);
    m_AllocatorSyncObj->WaitForCPUFence();

    HRESULT hr = m_pCommandAllocator->Reset();
    if (FAILED(hr)) return hr;
    hr = m_pCommandList->Reset(m_pCommandAllocator, nullptr);
    if (FAILED(hr)) return hr;

    // Set up VPBlt output
    D3D12_VIDEO_PROCESS_OUTPUT_STREAM_DESC outputStreamDesc = {};
    outputStreamDesc.Format = vpOutFormat;
    outputStreamDesc.AlphaFillMode = D3D12_VIDEO_PROCESS_ALPHA_FILL_MODE_OPAQUE;
    outputStreamDesc.AlphaFillModeSourceStreamIndex = 0;
    CopyMemory(outputStreamDesc.BackgroundColor, BackgroundColor, sizeof(FLOAT) * 4);
    outputStreamDesc.FrameRate.Numerator = 30;
    outputStreamDesc.FrameRate.Denominator = 1;
    outputStreamDesc.EnableStereo = FALSE;
    outputStreamDesc.ColorSpace = OutputSpace;

    D3D12_FEATURE_DATA_VIDEO_PROCESS_SUPPORT vpFeatures = {};
    vpFeatures.InputSample.Width = pVPInputStreamDesc->SourceAspectRatio.Numerator;
    vpFeatures.InputSample.Height = pVPInputStreamDesc->SourceAspectRatio.Denominator;
    vpFeatures.InputSample.Format.ColorSpace = pVPInputStreamDesc->ColorSpace;
    vpFeatures.InputSample.Format.Format = pVPInputStreamDesc->Format;
    vpFeatures.InputFieldType = D3D12_VIDEO_FIELD_TYPE_NONE;
    vpFeatures.InputStereoFormat = D3D12_VIDEO_FRAME_STEREO_FORMAT_NONE;
    vpFeatures.InputFrameRate = pVPInputStreamDesc->FrameRate;
    vpFeatures.OutputFormat.ColorSpace = outputStreamDesc.ColorSpace;
    vpFeatures.OutputFormat.Format = outputStreamDesc.Format;
    vpFeatures.OutputStereoFormat = D3D12_VIDEO_FRAME_STEREO_FORMAT_NONE;
    vpFeatures.OutputFrameRate = outputStreamDesc.FrameRate;

    hr = m_pD3D12VideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_PROCESS_SUPPORT, &vpFeatures, sizeof(vpFeatures));

    hr = m_pD3D12VideoDevice->CreateVideoProcessor(m_gpuMask, &outputStreamDesc, NumStreams, pVPInputStreamDesc, IID_PPV_ARGS(&m_pD3D12VideoProcessor));
    if (FAILED(hr)) return hr;

    // Close the command list
    hr = m_pCommandList->Close();
    if (FAILED(hr)) return hr;

    // Execute the command list.
    ID3D12CommandList* ppCommandLists[] = { m_pCommandList };
    m_pCommandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);

    if (ppD3D12VideoProcessor)
    {
        *ppD3D12VideoProcessor = m_pD3D12VideoProcessor;
    }
    return hr;
}

HRESULT CDx12Api::CreateVideoTexture(
    UINT64              Width,
    UINT                Height,
    DXGI_FORMAT         dxgiFormat,
    bool                bIsHwRT,
    bool                bIsCPUWrite,
    bool                bIsDecodeSurf,
    UINT64*             pBufferSize,
    ID3D12Resource**    ppTexture,
    UINT                creationNodeMask,
    UINT                nodeMask,
    bool                bIsShared,
    D3D12_RESOURCE_STATES* pInitState)
{
    // source texture
    D3D12_RESOURCE_DESC textureDesc = {};
    if (Height == 1 && dxgiFormat == DXGI_FORMAT_UNKNOWN)
    {
        textureDesc = CD3DX12_RESOURCE_DESC::Buffer(Width);
    }
    else
    {
        textureDesc = CD3DX12_RESOURCE_DESC::Tex2D(dxgiFormat, Width, Height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_NONE);
    }


    D3D12_HEAP_PROPERTIES heapProp = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, creationNodeMask, nodeMask);
    D3D12_HEAP_FLAGS heapFlags = bIsShared ? D3D12_HEAP_FLAG_SHARED : D3D12_HEAP_FLAG_NONE;
    D3D12_RESOURCE_STATES initState = D3D12_RESOURCE_STATE_GENERIC_READ;

    if (bIsHwRT)
    {
        textureDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
        if (bIsDecodeSurf)
        {
            heapFlags = D3D12_HEAP_FLAG_SHARED;
            initState = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
        }
    }
    else
    {
        if (bIsCPUWrite)
        {
            heapProp = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD, creationNodeMask, nodeMask);
        }
        else
        {
            heapProp = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK, creationNodeMask, nodeMask);
            initState = D3D12_RESOURCE_STATE_COPY_DEST;
        }
    }
    HRESULT hr = m_pD3D12Device->CreateCommittedResource(
                                    &heapProp,
                                    heapFlags,
                                    &textureDesc,
                                    initState,
                                    nullptr,
                                    IID_PPV_ARGS(ppTexture));
    if (FAILED(hr)) return hr;

    if (pBufferSize)
    {
        const UINT subresourceCount = textureDesc.DepthOrArraySize * textureDesc.MipLevels;

        D3D12_RESOURCE_DESC Desc = (*ppTexture)->GetDesc();
        m_pD3D12Device->GetCopyableFootprints(&Desc, 0, subresourceCount, 0, nullptr, nullptr, nullptr, pBufferSize);
    }
    if (pInitState)
    {
        *pInitState = initState;
    }

    return hr;
}


////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Execute the processFrames
HRESULT CDx12Api::VpBltHd(D3D12_VIDEO_PROCESS_OUTPUT_STREAM_ARGUMENTS* pOutputStream,
                            CDx12SyncObject* pDstSyncObj,
                            D3D12_VIDEO_PROCESS_INPUT_STREAM_ARGUMENTS1* pVPInputStreams,
                            UINT nInputStreamCount,
                            ID3D12VideoProcessor* pD3D12VideoProcessor)
{
    if (!pD3D12VideoProcessor)
    {
        pD3D12VideoProcessor = m_pD3D12VideoProcessor;
    }
    pDstSyncObj->WaitForFence(m_pVideoCommandQueue);

    // make sure the allocator is ready
    m_AllocatorSyncObj->SignalFence(m_pVideoCommandQueue);
    m_AllocatorSyncObj->WaitForCPUFence();

    HRESULT hr = m_pVideoCommandAllocator->Reset();
    if (FAILED(hr)) return hr;
    hr = m_pVideoProcessCommandList->Reset(m_pVideoCommandAllocator);
    if (FAILED(hr)) return hr;

    // Indicate the inputs are vp read
    for (UINT n = 0; n < nInputStreamCount; n++)
    {
        CD3DX12_RESOURCE_BARRIER barrier =
            CD3DX12_RESOURCE_BARRIER::Transition(pVPInputStreams[n].InputStream[0].pTexture2D,
                D3D12_RESOURCE_STATE_COMMON,
                D3D12_RESOURCE_STATE_VIDEO_PROCESS_READ,
                pVPInputStreams[n].InputStream[0].Subresource);
        m_pVideoProcessCommandList->ResourceBarrier(1, &barrier);
    }

    // Indicate output is a vp render target.
    {
        CD3DX12_RESOURCE_BARRIER barrier =
            CD3DX12_RESOURCE_BARRIER::Transition(pOutputStream->OutputStream[0].pTexture2D,
                D3D12_RESOURCE_STATE_COMMON,
                D3D12_RESOURCE_STATE_VIDEO_PROCESS_WRITE,
                pOutputStream->OutputStream[0].Subresource);
        m_pVideoProcessCommandList->ResourceBarrier(1, &barrier);
    }

    m_pVideoProcessCommandList->SetPredication(m_pPredication, 0, D3D12_PREDICATION_OP_EQUAL_ZERO);

    m_pVideoProcessCommandList1->ProcessFrames1(pD3D12VideoProcessor, pOutputStream, nInputStreamCount, pVPInputStreams);

    // Use Immediate to track progress    
    if (m_pWriteImmediateBuffer)
    {
        D3D12_WRITEBUFFERIMMEDIATE_PARAMETER writeImmediateParam;
        D3D12_WRITEBUFFERIMMEDIATE_MODE writeMode = D3D12_WRITEBUFFERIMMEDIATE_MODE_DEFAULT;
        writeImmediateParam.Dest = m_pWriteImmediateBuffer->GetGPUVirtualAddress();
        writeImmediateParam.Value = ++m_nFrameCount;

        m_pVideoProcessCommandList->WriteBufferImmediate(1, &writeImmediateParam, &writeMode);
    }

    // transition inputs back to common
    for (UINT n = 0; n < nInputStreamCount; n++)
    {
        CD3DX12_RESOURCE_BARRIER barrier =
            CD3DX12_RESOURCE_BARRIER::Transition(pVPInputStreams[n].InputStream[0].pTexture2D,
                D3D12_RESOURCE_STATE_VIDEO_PROCESS_READ,
                D3D12_RESOURCE_STATE_COMMON,
                pVPInputStreams[n].InputStream[0].Subresource);
        m_pVideoProcessCommandList->ResourceBarrier(1, &barrier);
    }

    // transition output back to common
    {
        CD3DX12_RESOURCE_BARRIER barrier =
            CD3DX12_RESOURCE_BARRIER::Transition(pOutputStream->OutputStream[0].pTexture2D,
                D3D12_RESOURCE_STATE_VIDEO_PROCESS_WRITE,
                D3D12_RESOURCE_STATE_COMMON,
                pOutputStream->OutputStream[0].Subresource);
        m_pVideoProcessCommandList->ResourceBarrier(1, &barrier);
    }

    // close command list
    hr = m_pVideoProcessCommandList->Close();
    if (FAILED(hr)) return hr;

    // Execute the command list.
    ID3D12CommandList* ppCommandLists[] = { m_pVideoProcessCommandList };
    m_pVideoCommandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);

    pDstSyncObj->SignalFence(m_pVideoCommandQueue);

    return S_OK;
}

////////////////////////////////////////////////////////////////////////////////////////////
bool CDx12Api::CheckWriteImmediateSupport(ID3D12Device* pD3D12Device)
{
    D3D12_FEATURE_DATA_D3D12_OPTIONS3 options;
    memset(&options, 0, sizeof(options));
    HRESULT hr = pD3D12Device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &options, sizeof(options));
    if (SUCCEEDED(hr) && (options.WriteBufferImmediateSupportFlags & D3D12_COMMAND_LIST_SUPPORT_FLAG_VIDEO_PROCESS))
    {
        return true;
    }
    return false;
}
