mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2024-12-01 10:59:16 +08:00
510 lines
19 KiB
C++
510 lines
19 KiB
C++
|
/*
|
||
|
* Copyright 1993-2018 NVIDIA Corporation. All rights reserved.
|
||
|
*
|
||
|
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||
|
* with this source code for terms and conditions that govern your use of
|
||
|
* this software. Any use, reproduction, disclosure, or distribution of
|
||
|
* this software and related documentation outside the terms of the EULA
|
||
|
* is strictly prohibited.
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
|
||
|
#include "stdafx.h"
|
||
|
#include "simpleD3D12.h"
|
||
|
#include <aclapi.h>
|
||
|
|
||
|
//////////////////////////////////////////////
|
||
|
// WindowsSecurityAttributes implementation //
|
||
|
//////////////////////////////////////////////
|
||
|
|
||
|
class WindowsSecurityAttributes {
|
||
|
protected:
|
||
|
SECURITY_ATTRIBUTES m_winSecurityAttributes;
|
||
|
PSECURITY_DESCRIPTOR m_winPSecurityDescriptor;
|
||
|
|
||
|
public:
|
||
|
WindowsSecurityAttributes();
|
||
|
~WindowsSecurityAttributes();
|
||
|
SECURITY_ATTRIBUTES * operator&();
|
||
|
};
|
||
|
|
||
|
WindowsSecurityAttributes::WindowsSecurityAttributes()
|
||
|
{
|
||
|
m_winPSecurityDescriptor = (PSECURITY_DESCRIPTOR)calloc(1, SECURITY_DESCRIPTOR_MIN_LENGTH + 2 * sizeof(void**));
|
||
|
assert(m_winPSecurityDescriptor != (PSECURITY_DESCRIPTOR)NULL);
|
||
|
|
||
|
PSID *ppSID = (PSID *)((PBYTE)m_winPSecurityDescriptor + SECURITY_DESCRIPTOR_MIN_LENGTH);
|
||
|
PACL *ppACL = (PACL *)((PBYTE)ppSID + sizeof(PSID *));
|
||
|
|
||
|
InitializeSecurityDescriptor(m_winPSecurityDescriptor, SECURITY_DESCRIPTOR_REVISION);
|
||
|
|
||
|
SID_IDENTIFIER_AUTHORITY sidIdentifierAuthority = SECURITY_WORLD_SID_AUTHORITY;
|
||
|
AllocateAndInitializeSid(&sidIdentifierAuthority, 1, SECURITY_WORLD_RID, 0, 0, 0, 0, 0, 0, 0, ppSID);
|
||
|
|
||
|
EXPLICIT_ACCESS explicitAccess;
|
||
|
ZeroMemory(&explicitAccess, sizeof(EXPLICIT_ACCESS));
|
||
|
explicitAccess.grfAccessPermissions = STANDARD_RIGHTS_ALL | SPECIFIC_RIGHTS_ALL;
|
||
|
explicitAccess.grfAccessMode = SET_ACCESS;
|
||
|
explicitAccess.grfInheritance = INHERIT_ONLY;
|
||
|
explicitAccess.Trustee.TrusteeForm = TRUSTEE_IS_SID;
|
||
|
explicitAccess.Trustee.TrusteeType = TRUSTEE_IS_WELL_KNOWN_GROUP;
|
||
|
explicitAccess.Trustee.ptstrName = (LPTSTR)*ppSID;
|
||
|
|
||
|
SetEntriesInAcl(1, &explicitAccess, NULL, ppACL);
|
||
|
|
||
|
SetSecurityDescriptorDacl(m_winPSecurityDescriptor, TRUE, *ppACL, FALSE);
|
||
|
|
||
|
m_winSecurityAttributes.nLength = sizeof(m_winSecurityAttributes);
|
||
|
m_winSecurityAttributes.lpSecurityDescriptor = m_winPSecurityDescriptor;
|
||
|
m_winSecurityAttributes.bInheritHandle = TRUE;
|
||
|
}
|
||
|
|
||
|
WindowsSecurityAttributes::~WindowsSecurityAttributes()
|
||
|
{
|
||
|
PSID* ppSID = (PSID*)((PBYTE)m_winPSecurityDescriptor + SECURITY_DESCRIPTOR_MIN_LENGTH);
|
||
|
PACL* ppACL = (PACL*)((PBYTE)ppSID + sizeof(PSID*));
|
||
|
|
||
|
if (*ppSID) {
|
||
|
FreeSid(*ppSID);
|
||
|
}
|
||
|
if (*ppACL) {
|
||
|
LocalFree(*ppACL);
|
||
|
}
|
||
|
free(m_winPSecurityDescriptor);
|
||
|
}
|
||
|
|
||
|
SECURITY_ATTRIBUTES *
|
||
|
WindowsSecurityAttributes::operator&()
|
||
|
{
|
||
|
return &m_winSecurityAttributes;
|
||
|
}
|
||
|
|
||
|
DX12CudaInterop::DX12CudaInterop(UINT width, UINT height, std::string name) :
|
||
|
DX12CudaSample(width, height, name),
|
||
|
m_frameIndex(0),
|
||
|
m_scissorRect(0, 0, static_cast<LONG>(width), static_cast<LONG>(height)),
|
||
|
m_fenceValues{},
|
||
|
m_rtvDescriptorSize(0)
|
||
|
{
|
||
|
m_viewport = { 0.0f, 0.0f, static_cast<float>(width), static_cast<float>(height) };
|
||
|
m_AnimTime = 1.0f;
|
||
|
}
|
||
|
|
||
|
void DX12CudaInterop::OnInit()
|
||
|
{
|
||
|
LoadPipeline();
|
||
|
InitCuda();
|
||
|
LoadAssets();
|
||
|
}
|
||
|
|
||
|
// Load the rendering pipeline dependencies.
|
||
|
void DX12CudaInterop::LoadPipeline()
|
||
|
{
|
||
|
UINT dxgiFactoryFlags = 0;
|
||
|
|
||
|
#if defined(_DEBUG)
|
||
|
// Enable the debug layer (requires the Graphics Tools "optional feature").
|
||
|
// NOTE: Enabling the debug layer after device creation will invalidate the active device.
|
||
|
{
|
||
|
ComPtr<ID3D12Debug> debugController;
|
||
|
if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&debugController))))
|
||
|
{
|
||
|
debugController->EnableDebugLayer();
|
||
|
|
||
|
// Enable additional debug layers.
|
||
|
dxgiFactoryFlags |= DXGI_CREATE_FACTORY_DEBUG;
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
ComPtr<IDXGIFactory4> factory;
|
||
|
ThrowIfFailed(CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(&factory)));
|
||
|
|
||
|
if (m_useWarpDevice)
|
||
|
{
|
||
|
ComPtr<IDXGIAdapter> warpAdapter;
|
||
|
ThrowIfFailed(factory->EnumWarpAdapter(IID_PPV_ARGS(&warpAdapter)));
|
||
|
|
||
|
ThrowIfFailed(D3D12CreateDevice(
|
||
|
warpAdapter.Get(),
|
||
|
D3D_FEATURE_LEVEL_11_0,
|
||
|
IID_PPV_ARGS(&m_device)
|
||
|
));
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
ComPtr<IDXGIAdapter1> hardwareAdapter;
|
||
|
GetHardwareAdapter(factory.Get(), &hardwareAdapter);
|
||
|
|
||
|
ThrowIfFailed(D3D12CreateDevice(
|
||
|
hardwareAdapter.Get(),
|
||
|
D3D_FEATURE_LEVEL_11_0,
|
||
|
IID_PPV_ARGS(&m_device)
|
||
|
));
|
||
|
DXGI_ADAPTER_DESC1 desc;
|
||
|
hardwareAdapter->GetDesc1(&desc);
|
||
|
m_dx12deviceluid = desc.AdapterLuid;
|
||
|
}
|
||
|
|
||
|
// Describe and create the command queue.
|
||
|
D3D12_COMMAND_QUEUE_DESC queueDesc = {};
|
||
|
queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
|
||
|
queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
|
||
|
|
||
|
ThrowIfFailed(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&m_commandQueue)));
|
||
|
|
||
|
// Describe and create the swap chain.
|
||
|
DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {};
|
||
|
swapChainDesc.BufferCount = FrameCount;
|
||
|
swapChainDesc.Width = m_width;
|
||
|
swapChainDesc.Height = m_height;
|
||
|
swapChainDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||
|
swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
|
||
|
swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
|
||
|
swapChainDesc.SampleDesc.Count = 1;
|
||
|
|
||
|
ComPtr<IDXGISwapChain1> swapChain;
|
||
|
ThrowIfFailed(factory->CreateSwapChainForHwnd(
|
||
|
m_commandQueue.Get(), // Swap chain needs the queue so that it can force a flush on it.
|
||
|
Win32Application::GetHwnd(),
|
||
|
&swapChainDesc,
|
||
|
nullptr,
|
||
|
nullptr,
|
||
|
&swapChain
|
||
|
));
|
||
|
|
||
|
// This sample does not support fullscreen transitions.
|
||
|
ThrowIfFailed(factory->MakeWindowAssociation(Win32Application::GetHwnd(), DXGI_MWA_NO_ALT_ENTER));
|
||
|
|
||
|
ThrowIfFailed(swapChain.As(&m_swapChain));
|
||
|
m_frameIndex = m_swapChain->GetCurrentBackBufferIndex();
|
||
|
|
||
|
// Create descriptor heaps.
|
||
|
{
|
||
|
// Describe and create a render target view (RTV) descriptor heap.
|
||
|
D3D12_DESCRIPTOR_HEAP_DESC rtvHeapDesc = {};
|
||
|
rtvHeapDesc.NumDescriptors = FrameCount;
|
||
|
rtvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
|
||
|
rtvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
|
||
|
ThrowIfFailed(m_device->CreateDescriptorHeap(&rtvHeapDesc, IID_PPV_ARGS(&m_rtvHeap)));
|
||
|
|
||
|
m_rtvDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
|
||
|
}
|
||
|
|
||
|
// Create frame resources.
|
||
|
{
|
||
|
CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(m_rtvHeap->GetCPUDescriptorHandleForHeapStart());
|
||
|
|
||
|
// Create a RTV and a command allocator for each frame.
|
||
|
for (UINT n = 0; n < FrameCount; n++)
|
||
|
{
|
||
|
ThrowIfFailed(m_swapChain->GetBuffer(n, IID_PPV_ARGS(&m_renderTargets[n])));
|
||
|
m_device->CreateRenderTargetView(m_renderTargets[n].Get(), nullptr, rtvHandle);
|
||
|
rtvHandle.Offset(1, m_rtvDescriptorSize);
|
||
|
|
||
|
ThrowIfFailed(m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocators[n])));
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void DX12CudaInterop::InitCuda()
|
||
|
{
|
||
|
int num_cuda_devices = 0;
|
||
|
checkCudaErrors(cudaGetDeviceCount(&num_cuda_devices));
|
||
|
|
||
|
if (!num_cuda_devices)
|
||
|
{
|
||
|
throw std::exception("No CUDA Devices found");
|
||
|
}
|
||
|
for (UINT devId = 0; devId < num_cuda_devices; devId++)
|
||
|
{
|
||
|
cudaDeviceProp devProp;
|
||
|
checkCudaErrors(cudaGetDeviceProperties(&devProp, devId));
|
||
|
|
||
|
if ((memcmp(&m_dx12deviceluid.LowPart, devProp.luid, sizeof(m_dx12deviceluid.LowPart)) == 0) && (memcmp(&m_dx12deviceluid.HighPart, devProp.luid + sizeof(m_dx12deviceluid.LowPart), sizeof(m_dx12deviceluid.HighPart)) == 0))
|
||
|
{
|
||
|
checkCudaErrors(cudaSetDevice(devId));
|
||
|
m_cudaDeviceID = devId;
|
||
|
m_nodeMask = devProp.luidDeviceNodeMask;
|
||
|
checkCudaErrors(cudaStreamCreate(&m_streamToRun));
|
||
|
printf("CUDA Device Used [%d] %s\n", devId, devProp.name);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Load the sample assets.
|
||
|
void DX12CudaInterop::LoadAssets()
|
||
|
{
|
||
|
// Create a root signature.
|
||
|
{
|
||
|
CD3DX12_DESCRIPTOR_RANGE range;
|
||
|
CD3DX12_ROOT_PARAMETER parameter;
|
||
|
|
||
|
range.Init(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0);
|
||
|
parameter.InitAsDescriptorTable(1, &range, D3D12_SHADER_VISIBILITY_VERTEX);
|
||
|
|
||
|
D3D12_ROOT_SIGNATURE_FLAGS rootSignatureFlags =
|
||
|
D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT | // Only the input assembler stage needs access to the constant buffer.
|
||
|
D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS |
|
||
|
D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS |
|
||
|
D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS |
|
||
|
D3D12_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS;
|
||
|
|
||
|
CD3DX12_ROOT_SIGNATURE_DESC descRootSignature;
|
||
|
descRootSignature.Init(1, ¶meter, 0, nullptr, rootSignatureFlags);
|
||
|
ComPtr<ID3DBlob> pSignature;
|
||
|
ComPtr<ID3DBlob> pError;
|
||
|
ThrowIfFailed(D3D12SerializeRootSignature(&descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, pSignature.GetAddressOf(), pError.GetAddressOf()));
|
||
|
ThrowIfFailed(m_device->CreateRootSignature(0, pSignature->GetBufferPointer(), pSignature->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature)));
|
||
|
}
|
||
|
// Create the pipeline state, which includes compiling and loading shaders.
|
||
|
{
|
||
|
ComPtr<ID3DBlob> vertexShader;
|
||
|
ComPtr<ID3DBlob> pixelShader;
|
||
|
|
||
|
#if defined(_DEBUG)
|
||
|
// Enable better shader debugging with the graphics debugging tools.
|
||
|
UINT compileFlags = D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION;
|
||
|
#else
|
||
|
UINT compileFlags = 0;
|
||
|
#endif
|
||
|
std::wstring filePath = GetAssetFullPath("shaders.hlsl");
|
||
|
LPCWSTR result = filePath.c_str();
|
||
|
ThrowIfFailed(D3DCompileFromFile(result, nullptr, nullptr, "VSMain", "vs_5_0", compileFlags, 0, &vertexShader, nullptr));
|
||
|
ThrowIfFailed(D3DCompileFromFile(result, nullptr, nullptr, "PSMain", "ps_5_0", compileFlags, 0, &pixelShader, nullptr));
|
||
|
|
||
|
// Define the vertex input layout.
|
||
|
D3D12_INPUT_ELEMENT_DESC inputElementDescs[] =
|
||
|
{
|
||
|
{ "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
|
||
|
{ "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }
|
||
|
};
|
||
|
|
||
|
// Describe and create the graphics pipeline state object (PSO).
|
||
|
D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {};
|
||
|
psoDesc.InputLayout = { inputElementDescs, _countof(inputElementDescs) };
|
||
|
psoDesc.pRootSignature = m_rootSignature.Get();
|
||
|
psoDesc.VS = CD3DX12_SHADER_BYTECODE(vertexShader.Get());
|
||
|
psoDesc.PS = CD3DX12_SHADER_BYTECODE(pixelShader.Get());
|
||
|
psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT);
|
||
|
psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT);
|
||
|
psoDesc.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC(D3D12_DEFAULT);
|
||
|
psoDesc.SampleMask = UINT_MAX;
|
||
|
psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
|
||
|
psoDesc.NumRenderTargets = 1;
|
||
|
psoDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||
|
psoDesc.SampleDesc.Count = 1;
|
||
|
ThrowIfFailed(m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&m_pipelineState)));
|
||
|
}
|
||
|
|
||
|
// Create the command list.
|
||
|
ThrowIfFailed(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocators[m_frameIndex].Get(), m_pipelineState.Get(), IID_PPV_ARGS(&m_commandList)));
|
||
|
|
||
|
// Command lists are created in the recording state, but there is nothing
|
||
|
// to record yet. The main loop expects it to be closed, so close it now.
|
||
|
ThrowIfFailed(m_commandList->Close());
|
||
|
|
||
|
// Create the vertex buffer.
|
||
|
{
|
||
|
// Define the geometry for a triangle.
|
||
|
vertBufWidth = m_width/2;
|
||
|
vertBufHeight = m_height/2;
|
||
|
const UINT vertexBufferSize = sizeof(Vertex)*vertBufWidth*vertBufHeight;
|
||
|
|
||
|
ThrowIfFailed(m_device->CreateCommittedResource(
|
||
|
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
|
||
|
D3D12_HEAP_FLAG_SHARED,
|
||
|
&CD3DX12_RESOURCE_DESC::Buffer(vertexBufferSize),
|
||
|
D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER,
|
||
|
nullptr,
|
||
|
IID_PPV_ARGS(&m_vertexBuffer)));
|
||
|
|
||
|
// Initialize the vertex buffer view.
|
||
|
m_vertexBufferView.BufferLocation = m_vertexBuffer->GetGPUVirtualAddress();
|
||
|
m_vertexBufferView.StrideInBytes = sizeof(Vertex);
|
||
|
m_vertexBufferView.SizeInBytes = vertexBufferSize;
|
||
|
|
||
|
HANDLE sharedHandle;
|
||
|
WindowsSecurityAttributes windowsSecurityAttributes;
|
||
|
LPCWSTR name = NULL;
|
||
|
ThrowIfFailed(m_device->CreateSharedHandle(m_vertexBuffer.Get(), &windowsSecurityAttributes, GENERIC_ALL, name, &sharedHandle));
|
||
|
|
||
|
D3D12_RESOURCE_ALLOCATION_INFO d3d12ResourceAllocationInfo;
|
||
|
d3d12ResourceAllocationInfo = m_device->GetResourceAllocationInfo(m_nodeMask, 1, &CD3DX12_RESOURCE_DESC::Buffer(vertexBufferSize));
|
||
|
size_t actualSize = d3d12ResourceAllocationInfo.SizeInBytes;
|
||
|
size_t alignment = d3d12ResourceAllocationInfo.Alignment;
|
||
|
|
||
|
cudaExternalMemoryHandleDesc externalMemoryHandleDesc;
|
||
|
memset(&externalMemoryHandleDesc, 0, sizeof(externalMemoryHandleDesc));
|
||
|
|
||
|
externalMemoryHandleDesc.type = cudaExternalMemoryHandleTypeD3D12Resource;
|
||
|
externalMemoryHandleDesc.handle.win32.handle = sharedHandle;
|
||
|
externalMemoryHandleDesc.size = actualSize;
|
||
|
externalMemoryHandleDesc.flags = cudaExternalMemoryDedicated;
|
||
|
|
||
|
checkCudaErrors(cudaImportExternalMemory(&m_externalMemory, &externalMemoryHandleDesc));
|
||
|
|
||
|
cudaExternalMemoryBufferDesc externalMemoryBufferDesc;
|
||
|
memset(&externalMemoryBufferDesc, 0, sizeof(externalMemoryBufferDesc));
|
||
|
externalMemoryBufferDesc.offset = 0;
|
||
|
externalMemoryBufferDesc.size = vertexBufferSize;
|
||
|
externalMemoryBufferDesc.flags = 0;
|
||
|
|
||
|
checkCudaErrors(cudaExternalMemoryGetMappedBuffer(&m_cudaDevVertptr, m_externalMemory, &externalMemoryBufferDesc));
|
||
|
RunSineWaveKernel(vertBufWidth, vertBufHeight, (Vertex *)m_cudaDevVertptr, m_streamToRun, 1.0f);
|
||
|
checkCudaErrors(cudaStreamSynchronize(m_streamToRun));
|
||
|
|
||
|
}
|
||
|
|
||
|
// Create synchronization objects and wait until assets have been uploaded to the GPU.
|
||
|
{
|
||
|
ThrowIfFailed(m_device->CreateFence(m_fenceValues[m_frameIndex], D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(&m_fence)));
|
||
|
|
||
|
cudaExternalSemaphoreHandleDesc externalSemaphoreHandleDesc;
|
||
|
|
||
|
memset(&externalSemaphoreHandleDesc, 0, sizeof(externalSemaphoreHandleDesc));
|
||
|
WindowsSecurityAttributes windowsSecurityAttributes;
|
||
|
LPCWSTR name = NULL;
|
||
|
HANDLE sharedHandle;
|
||
|
externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeD3D12Fence;
|
||
|
m_device->CreateSharedHandle(m_fence.Get(), &windowsSecurityAttributes, GENERIC_ALL, name, &sharedHandle);
|
||
|
externalSemaphoreHandleDesc.handle.win32.handle = (void *)sharedHandle;
|
||
|
externalSemaphoreHandleDesc.flags = 0;
|
||
|
|
||
|
checkCudaErrors(cudaImportExternalSemaphore(&m_externalSemaphore, &externalSemaphoreHandleDesc));
|
||
|
|
||
|
m_fenceValues[m_frameIndex]++;
|
||
|
|
||
|
// Create an event handle to use for frame synchronization.
|
||
|
m_fenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
|
||
|
if (m_fenceEvent == nullptr)
|
||
|
{
|
||
|
ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError()));
|
||
|
}
|
||
|
|
||
|
// Wait for the command list to execute; we are reusing the same command
|
||
|
// list in our main loop but for now, we just want to wait for setup to
|
||
|
// complete before continuing.
|
||
|
WaitForGpu();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Render the scene.
|
||
|
void DX12CudaInterop::OnRender()
|
||
|
{
|
||
|
// Record all the commands we need to render the scene into the command list.
|
||
|
PopulateCommandList();
|
||
|
|
||
|
// Execute the command list.
|
||
|
ID3D12CommandList* ppCommandLists[] = { m_commandList.Get() };
|
||
|
m_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);
|
||
|
|
||
|
// Present the frame.
|
||
|
ThrowIfFailed(m_swapChain->Present(1, 0));
|
||
|
|
||
|
// Schedule a Signal command in the queue.
|
||
|
const UINT64 currentFenceValue = m_fenceValues[m_frameIndex];
|
||
|
ThrowIfFailed(m_commandQueue->Signal(m_fence.Get(), currentFenceValue));
|
||
|
|
||
|
MoveToNextFrame();
|
||
|
}
|
||
|
|
||
|
void DX12CudaInterop::OnDestroy()
|
||
|
{
|
||
|
// Ensure that the GPU is no longer referencing resources that are about to be
|
||
|
// cleaned up by the destructor.
|
||
|
WaitForGpu();
|
||
|
checkCudaErrors(cudaDestroyExternalSemaphore(m_externalSemaphore));
|
||
|
checkCudaErrors(cudaDestroyExternalMemory(m_externalMemory));
|
||
|
CloseHandle(m_fenceEvent);
|
||
|
}
|
||
|
|
||
|
void DX12CudaInterop::PopulateCommandList()
|
||
|
{
|
||
|
// Command list allocators can only be reset when the associated
|
||
|
// command lists have finished execution on the GPU; apps should use
|
||
|
// fences to determine GPU execution progress.
|
||
|
ThrowIfFailed(m_commandAllocators[m_frameIndex]->Reset());
|
||
|
|
||
|
// However, when ExecuteCommandList() is called on a particular command
|
||
|
// list, that command list can then be reset at any time and must be before
|
||
|
// re-recording.
|
||
|
ThrowIfFailed(m_commandList->Reset(m_commandAllocators[m_frameIndex].Get(), m_pipelineState.Get()));
|
||
|
|
||
|
m_commandList->SetGraphicsRootSignature(m_rootSignature.Get());
|
||
|
|
||
|
// Set necessary state.
|
||
|
m_commandList->RSSetViewports(1, &m_viewport);
|
||
|
m_commandList->RSSetScissorRects(1, &m_scissorRect);
|
||
|
|
||
|
// Indicate that the back buffer will be used as a render target.
|
||
|
m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_renderTargets[m_frameIndex].Get(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET));
|
||
|
|
||
|
CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(m_rtvHeap->GetCPUDescriptorHandleForHeapStart(), m_frameIndex, m_rtvDescriptorSize);
|
||
|
m_commandList->OMSetRenderTargets(1, &rtvHandle, FALSE, nullptr);
|
||
|
|
||
|
// Record commands.
|
||
|
const float clearColor[] = { 0.0f, 0.2f, 0.4f, 1.0f };
|
||
|
m_commandList->ClearRenderTargetView(rtvHandle, clearColor, 0, nullptr);
|
||
|
m_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_POINTLIST);
|
||
|
m_commandList->IASetVertexBuffers(0, 1, &m_vertexBufferView);
|
||
|
m_commandList->DrawInstanced(vertBufHeight*vertBufWidth, 1, 0, 0);
|
||
|
|
||
|
// Indicate that the back buffer will now be used to present.
|
||
|
m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_renderTargets[m_frameIndex].Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT));
|
||
|
|
||
|
ThrowIfFailed(m_commandList->Close());
|
||
|
}
|
||
|
|
||
|
// Wait for pending GPU work to complete.
|
||
|
void DX12CudaInterop::WaitForGpu()
|
||
|
{
|
||
|
// Schedule a Signal command in the queue.
|
||
|
ThrowIfFailed(m_commandQueue->Signal(m_fence.Get(), m_fenceValues[m_frameIndex]));
|
||
|
|
||
|
// Wait until the fence has been processed.
|
||
|
ThrowIfFailed(m_fence->SetEventOnCompletion(m_fenceValues[m_frameIndex], m_fenceEvent));
|
||
|
WaitForSingleObjectEx(m_fenceEvent, INFINITE, FALSE);
|
||
|
|
||
|
// Increment the fence value for the current frame.
|
||
|
m_fenceValues[m_frameIndex]++;
|
||
|
}
|
||
|
|
||
|
// Prepare to render the next frame.
|
||
|
void DX12CudaInterop::MoveToNextFrame()
|
||
|
{
|
||
|
const UINT64 currentFenceValue = m_fenceValues[m_frameIndex];
|
||
|
cudaExternalSemaphoreWaitParams externalSemaphoreWaitParams;
|
||
|
memset(&externalSemaphoreWaitParams, 0, sizeof(externalSemaphoreWaitParams));
|
||
|
|
||
|
externalSemaphoreWaitParams.params.fence.value = currentFenceValue;
|
||
|
externalSemaphoreWaitParams.flags = 0;
|
||
|
|
||
|
checkCudaErrors(cudaWaitExternalSemaphoresAsync(&m_externalSemaphore, &externalSemaphoreWaitParams, 1, m_streamToRun));
|
||
|
|
||
|
m_AnimTime += 0.01f;
|
||
|
RunSineWaveKernel(vertBufWidth, vertBufHeight, (Vertex *)m_cudaDevVertptr, m_streamToRun, m_AnimTime);
|
||
|
|
||
|
cudaExternalSemaphoreSignalParams externalSemaphoreSignalParams;
|
||
|
memset(&externalSemaphoreSignalParams, 0, sizeof(externalSemaphoreSignalParams));
|
||
|
m_fenceValues[m_frameIndex] = currentFenceValue + 1;
|
||
|
externalSemaphoreSignalParams.params.fence.value = m_fenceValues[m_frameIndex];
|
||
|
externalSemaphoreSignalParams.flags = 0;
|
||
|
|
||
|
checkCudaErrors(cudaSignalExternalSemaphoresAsync(&m_externalSemaphore, &externalSemaphoreSignalParams, 1, m_streamToRun));
|
||
|
|
||
|
// Update the frame index.
|
||
|
m_frameIndex = m_swapChain->GetCurrentBackBufferIndex();
|
||
|
|
||
|
// If the next frame is not ready to be rendered yet, wait until it is ready.
|
||
|
if (m_fence->GetCompletedValue() < m_fenceValues[m_frameIndex])
|
||
|
{
|
||
|
ThrowIfFailed(m_fence->SetEventOnCompletion(m_fenceValues[m_frameIndex], m_fenceEvent));
|
||
|
WaitForSingleObjectEx(m_fenceEvent, INFINITE, FALSE);
|
||
|
}
|
||
|
|
||
|
// Set the fence value for the next frame.
|
||
|
m_fenceValues[m_frameIndex] = currentFenceValue + 2;
|
||
|
}
|