/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of NVIDIA CORPORATION nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "d3dx12.h" #include #include #include #include #include "ShaderStructs.h" #include "simpleD3D12.h" #include ////////////////////////////////////////////// // WindowsSecurityAttributes implementation // ////////////////////////////////////////////// class WindowsSecurityAttributes { protected: SECURITY_ATTRIBUTES m_winSecurityAttributes; PSECURITY_DESCRIPTOR m_winPSecurityDescriptor; public: WindowsSecurityAttributes(); ~WindowsSecurityAttributes(); SECURITY_ATTRIBUTES *operator&(); }; WindowsSecurityAttributes::WindowsSecurityAttributes() { m_winPSecurityDescriptor = (PSECURITY_DESCRIPTOR)calloc( 1, SECURITY_DESCRIPTOR_MIN_LENGTH + 2 * sizeof(void **)); assert(m_winPSecurityDescriptor != (PSECURITY_DESCRIPTOR)NULL); PSID *ppSID = (PSID *)((PBYTE)m_winPSecurityDescriptor + SECURITY_DESCRIPTOR_MIN_LENGTH); PACL *ppACL = (PACL *)((PBYTE)ppSID + sizeof(PSID *)); InitializeSecurityDescriptor(m_winPSecurityDescriptor, SECURITY_DESCRIPTOR_REVISION); SID_IDENTIFIER_AUTHORITY sidIdentifierAuthority = SECURITY_WORLD_SID_AUTHORITY; AllocateAndInitializeSid(&sidIdentifierAuthority, 1, SECURITY_WORLD_RID, 0, 0, 0, 0, 0, 0, 0, ppSID); EXPLICIT_ACCESS explicitAccess; ZeroMemory(&explicitAccess, sizeof(EXPLICIT_ACCESS)); explicitAccess.grfAccessPermissions = STANDARD_RIGHTS_ALL | SPECIFIC_RIGHTS_ALL; explicitAccess.grfAccessMode = SET_ACCESS; explicitAccess.grfInheritance = INHERIT_ONLY; explicitAccess.Trustee.TrusteeForm = TRUSTEE_IS_SID; explicitAccess.Trustee.TrusteeType = TRUSTEE_IS_WELL_KNOWN_GROUP; explicitAccess.Trustee.ptstrName = (LPTSTR)*ppSID; SetEntriesInAcl(1, &explicitAccess, NULL, ppACL); SetSecurityDescriptorDacl(m_winPSecurityDescriptor, TRUE, *ppACL, FALSE); m_winSecurityAttributes.nLength = sizeof(m_winSecurityAttributes); m_winSecurityAttributes.lpSecurityDescriptor = m_winPSecurityDescriptor; m_winSecurityAttributes.bInheritHandle = TRUE; } WindowsSecurityAttributes::~WindowsSecurityAttributes() { PSID *ppSID = (PSID *)((PBYTE)m_winPSecurityDescriptor + SECURITY_DESCRIPTOR_MIN_LENGTH); PACL *ppACL = (PACL *)((PBYTE)ppSID + sizeof(PSID *)); if (*ppSID) { FreeSid(*ppSID); } if (*ppACL) { LocalFree(*ppACL); } free(m_winPSecurityDescriptor); } SECURITY_ATTRIBUTES *WindowsSecurityAttributes::operator&() { return &m_winSecurityAttributes; } DX12CudaInterop::DX12CudaInterop(UINT width, UINT height, std::string name) : DX12CudaSample(width, height, name), m_frameIndex(0), m_scissorRect(0, 0, static_cast(width), static_cast(height)), m_fenceValues{}, m_rtvDescriptorSize(0) { m_viewport = {0.0f, 0.0f, static_cast(width), static_cast(height)}; m_AnimTime = 1.0f; } void DX12CudaInterop::OnInit() { LoadPipeline(); InitCuda(); LoadAssets(); } // Load the rendering pipeline dependencies. void DX12CudaInterop::LoadPipeline() { UINT dxgiFactoryFlags = 0; #if defined(_DEBUG) // Enable the debug layer (requires the Graphics Tools "optional feature"). // NOTE: Enabling the debug layer after device creation will invalidate the // active device. { ComPtr debugController; if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&debugController)))) { debugController->EnableDebugLayer(); // Enable additional debug layers. dxgiFactoryFlags |= DXGI_CREATE_FACTORY_DEBUG; } } #endif ComPtr factory; ThrowIfFailed(CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(&factory))); if (m_useWarpDevice) { ComPtr warpAdapter; ThrowIfFailed(factory->EnumWarpAdapter(IID_PPV_ARGS(&warpAdapter))); ThrowIfFailed(D3D12CreateDevice(warpAdapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device))); } else { ComPtr hardwareAdapter; GetHardwareAdapter(factory.Get(), &hardwareAdapter); ThrowIfFailed(D3D12CreateDevice(hardwareAdapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device))); DXGI_ADAPTER_DESC1 desc; hardwareAdapter->GetDesc1(&desc); m_dx12deviceluid = desc.AdapterLuid; } // Describe and create the command queue. D3D12_COMMAND_QUEUE_DESC queueDesc = {}; queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; ThrowIfFailed( m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&m_commandQueue))); // Describe and create the swap chain. DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {}; swapChainDesc.BufferCount = FrameCount; swapChainDesc.Width = m_width; swapChainDesc.Height = m_height; swapChainDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; swapChainDesc.SampleDesc.Count = 1; ComPtr swapChain; ThrowIfFailed(factory->CreateSwapChainForHwnd( m_commandQueue.Get(), // Swap chain needs the queue so that it can force // a flush on it. Win32Application::GetHwnd(), &swapChainDesc, nullptr, nullptr, &swapChain)); // This sample does not support fullscreen transitions. ThrowIfFailed(factory->MakeWindowAssociation(Win32Application::GetHwnd(), DXGI_MWA_NO_ALT_ENTER)); ThrowIfFailed(swapChain.As(&m_swapChain)); m_frameIndex = m_swapChain->GetCurrentBackBufferIndex(); // Create descriptor heaps. { // Describe and create a render target view (RTV) descriptor heap. D3D12_DESCRIPTOR_HEAP_DESC rtvHeapDesc = {}; rtvHeapDesc.NumDescriptors = FrameCount; rtvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; rtvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; ThrowIfFailed( m_device->CreateDescriptorHeap(&rtvHeapDesc, IID_PPV_ARGS(&m_rtvHeap))); m_rtvDescriptorSize = m_device->GetDescriptorHandleIncrementSize( D3D12_DESCRIPTOR_HEAP_TYPE_RTV); } // Create frame resources. { CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle( m_rtvHeap->GetCPUDescriptorHandleForHeapStart()); // Create a RTV and a command allocator for each frame. for (UINT n = 0; n < FrameCount; n++) { ThrowIfFailed( m_swapChain->GetBuffer(n, IID_PPV_ARGS(&m_renderTargets[n]))); m_device->CreateRenderTargetView(m_renderTargets[n].Get(), nullptr, rtvHandle); rtvHandle.Offset(1, m_rtvDescriptorSize); ThrowIfFailed(m_device->CreateCommandAllocator( D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocators[n]))); } } } void DX12CudaInterop::InitCuda() { int num_cuda_devices = 0; checkCudaErrors(cudaGetDeviceCount(&num_cuda_devices)); if (!num_cuda_devices) { throw std::exception("No CUDA Devices found"); } for (UINT devId = 0; devId < num_cuda_devices; devId++) { cudaDeviceProp devProp; checkCudaErrors(cudaGetDeviceProperties(&devProp, devId)); if ((memcmp(&m_dx12deviceluid.LowPart, devProp.luid, sizeof(m_dx12deviceluid.LowPart)) == 0) && (memcmp(&m_dx12deviceluid.HighPart, devProp.luid + sizeof(m_dx12deviceluid.LowPart), sizeof(m_dx12deviceluid.HighPart)) == 0)) { checkCudaErrors(cudaSetDevice(devId)); m_cudaDeviceID = devId; m_nodeMask = devProp.luidDeviceNodeMask; checkCudaErrors(cudaStreamCreate(&m_streamToRun)); printf("CUDA Device Used [%d] %s\n", devId, devProp.name); break; } } } // Load the sample assets. void DX12CudaInterop::LoadAssets() { // Create a root signature. { CD3DX12_DESCRIPTOR_RANGE range; CD3DX12_ROOT_PARAMETER parameter; range.Init(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0); parameter.InitAsDescriptorTable(1, &range, D3D12_SHADER_VISIBILITY_VERTEX); D3D12_ROOT_SIGNATURE_FLAGS rootSignatureFlags = // Only the input assembler stage needs access to the constant buffer. D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT | D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS | D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS | D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS | D3D12_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS; CD3DX12_ROOT_SIGNATURE_DESC descRootSignature; descRootSignature.Init(1, ¶meter, 0, nullptr, rootSignatureFlags); ComPtr pSignature; ComPtr pError; ThrowIfFailed(D3D12SerializeRootSignature( &descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, pSignature.GetAddressOf(), pError.GetAddressOf())); ThrowIfFailed(m_device->CreateRootSignature( 0, pSignature->GetBufferPointer(), pSignature->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature))); } // Create the pipeline state, which includes compiling and loading shaders. { ComPtr vertexShader; ComPtr pixelShader; #if defined(_DEBUG) // Enable better shader debugging with the graphics debugging tools. UINT compileFlags = D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION; #else UINT compileFlags = 0; #endif std::wstring filePath = GetAssetFullPath("shaders.hlsl"); LPCWSTR result = filePath.c_str(); ThrowIfFailed(D3DCompileFromFile(result, nullptr, nullptr, "VSMain", "vs_5_0", compileFlags, 0, &vertexShader, nullptr)); ThrowIfFailed(D3DCompileFromFile(result, nullptr, nullptr, "PSMain", "ps_5_0", compileFlags, 0, &pixelShader, nullptr)); // Define the vertex input layout. D3D12_INPUT_ELEMENT_DESC inputElementDescs[] = { {"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, {"COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}}; // Describe and create the graphics pipeline state object (PSO). D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; psoDesc.InputLayout = {inputElementDescs, _countof(inputElementDescs)}; psoDesc.pRootSignature = m_rootSignature.Get(); psoDesc.VS = CD3DX12_SHADER_BYTECODE(vertexShader.Get()); psoDesc.PS = CD3DX12_SHADER_BYTECODE(pixelShader.Get()); psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); psoDesc.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC(D3D12_DEFAULT); psoDesc.SampleMask = UINT_MAX; psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; psoDesc.NumRenderTargets = 1; psoDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; psoDesc.SampleDesc.Count = 1; ThrowIfFailed(m_device->CreateGraphicsPipelineState( &psoDesc, IID_PPV_ARGS(&m_pipelineState))); } // Create the command list. ThrowIfFailed(m_device->CreateCommandList( 0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocators[m_frameIndex].Get(), m_pipelineState.Get(), IID_PPV_ARGS(&m_commandList))); // Command lists are created in the recording state, but there is nothing // to record yet. The main loop expects it to be closed, so close it now. ThrowIfFailed(m_commandList->Close()); // Create the vertex buffer. { // Define the geometry for a triangle. vertBufWidth = m_width / 2; vertBufHeight = m_height / 2; const UINT vertexBufferSize = sizeof(Vertex) * vertBufWidth * vertBufHeight; ThrowIfFailed(m_device->CreateCommittedResource( &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_SHARED, &CD3DX12_RESOURCE_DESC::Buffer(vertexBufferSize), D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, nullptr, IID_PPV_ARGS(&m_vertexBuffer))); // Initialize the vertex buffer view. m_vertexBufferView.BufferLocation = m_vertexBuffer->GetGPUVirtualAddress(); m_vertexBufferView.StrideInBytes = sizeof(Vertex); m_vertexBufferView.SizeInBytes = vertexBufferSize; HANDLE sharedHandle; WindowsSecurityAttributes windowsSecurityAttributes; LPCWSTR name = NULL; ThrowIfFailed(m_device->CreateSharedHandle( m_vertexBuffer.Get(), &windowsSecurityAttributes, GENERIC_ALL, name, &sharedHandle)); D3D12_RESOURCE_ALLOCATION_INFO d3d12ResourceAllocationInfo; d3d12ResourceAllocationInfo = m_device->GetResourceAllocationInfo( m_nodeMask, 1, &CD3DX12_RESOURCE_DESC::Buffer(vertexBufferSize)); size_t actualSize = d3d12ResourceAllocationInfo.SizeInBytes; size_t alignment = d3d12ResourceAllocationInfo.Alignment; cudaExternalMemoryHandleDesc externalMemoryHandleDesc; memset(&externalMemoryHandleDesc, 0, sizeof(externalMemoryHandleDesc)); externalMemoryHandleDesc.type = cudaExternalMemoryHandleTypeD3D12Resource; externalMemoryHandleDesc.handle.win32.handle = sharedHandle; externalMemoryHandleDesc.size = actualSize; externalMemoryHandleDesc.flags = cudaExternalMemoryDedicated; checkCudaErrors( cudaImportExternalMemory(&m_externalMemory, &externalMemoryHandleDesc)); CloseHandle(sharedHandle); cudaExternalMemoryBufferDesc externalMemoryBufferDesc; memset(&externalMemoryBufferDesc, 0, sizeof(externalMemoryBufferDesc)); externalMemoryBufferDesc.offset = 0; externalMemoryBufferDesc.size = vertexBufferSize; externalMemoryBufferDesc.flags = 0; checkCudaErrors(cudaExternalMemoryGetMappedBuffer( &m_cudaDevVertptr, m_externalMemory, &externalMemoryBufferDesc)); RunSineWaveKernel(vertBufWidth, vertBufHeight, (Vertex *)m_cudaDevVertptr, m_streamToRun, 1.0f); checkCudaErrors(cudaStreamSynchronize(m_streamToRun)); } // Create synchronization objects and wait until assets have been uploaded to // the GPU. { ThrowIfFailed(m_device->CreateFence(m_fenceValues[m_frameIndex], D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(&m_fence))); cudaExternalSemaphoreHandleDesc externalSemaphoreHandleDesc; memset(&externalSemaphoreHandleDesc, 0, sizeof(externalSemaphoreHandleDesc)); WindowsSecurityAttributes windowsSecurityAttributes; LPCWSTR name = NULL; HANDLE sharedHandle; externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeD3D12Fence; m_device->CreateSharedHandle(m_fence.Get(), &windowsSecurityAttributes, GENERIC_ALL, name, &sharedHandle); externalSemaphoreHandleDesc.handle.win32.handle = (void *)sharedHandle; externalSemaphoreHandleDesc.flags = 0; checkCudaErrors(cudaImportExternalSemaphore(&m_externalSemaphore, &externalSemaphoreHandleDesc)); m_fenceValues[m_frameIndex]++; // Create an event handle to use for frame synchronization. m_fenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); if (m_fenceEvent == nullptr) { ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError())); } // Wait for the command list to execute; we are reusing the same command // list in our main loop but for now, we just want to wait for setup to // complete before continuing. WaitForGpu(); } } // Render the scene. void DX12CudaInterop::OnRender() { // Record all the commands we need to render the scene into the command list. PopulateCommandList(); // Execute the command list. ID3D12CommandList *ppCommandLists[] = {m_commandList.Get()}; m_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); // Present the frame. ThrowIfFailed(m_swapChain->Present(1, 0)); // Schedule a Signal command in the queue. const UINT64 currentFenceValue = m_fenceValues[m_frameIndex]; ThrowIfFailed(m_commandQueue->Signal(m_fence.Get(), currentFenceValue)); MoveToNextFrame(); } void DX12CudaInterop::OnDestroy() { // Ensure that the GPU is no longer referencing resources that are about to be // cleaned up by the destructor. WaitForGpu(); checkCudaErrors(cudaDestroyExternalSemaphore(m_externalSemaphore)); checkCudaErrors(cudaDestroyExternalMemory(m_externalMemory)); checkCudaErrors(cudaFree(m_cudaDevVertptr)); CloseHandle(m_fenceEvent); } void DX12CudaInterop::PopulateCommandList() { // Command list allocators can only be reset when the associated // command lists have finished execution on the GPU; apps should use // fences to determine GPU execution progress. ThrowIfFailed(m_commandAllocators[m_frameIndex]->Reset()); // However, when ExecuteCommandList() is called on a particular command // list, that command list can then be reset at any time and must be before // re-recording. ThrowIfFailed(m_commandList->Reset(m_commandAllocators[m_frameIndex].Get(), m_pipelineState.Get())); m_commandList->SetGraphicsRootSignature(m_rootSignature.Get()); // Set necessary state. m_commandList->RSSetViewports(1, &m_viewport); m_commandList->RSSetScissorRects(1, &m_scissorRect); // Indicate that the back buffer will be used as a render target. m_commandList->ResourceBarrier( 1, &CD3DX12_RESOURCE_BARRIER::Transition( m_renderTargets[m_frameIndex].Get(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET)); CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle( m_rtvHeap->GetCPUDescriptorHandleForHeapStart(), m_frameIndex, m_rtvDescriptorSize); m_commandList->OMSetRenderTargets(1, &rtvHandle, FALSE, nullptr); // Record commands. const float clearColor[] = {0.0f, 0.2f, 0.4f, 1.0f}; m_commandList->ClearRenderTargetView(rtvHandle, clearColor, 0, nullptr); m_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_POINTLIST); m_commandList->IASetVertexBuffers(0, 1, &m_vertexBufferView); m_commandList->DrawInstanced(vertBufHeight * vertBufWidth, 1, 0, 0); // Indicate that the back buffer will now be used to present. m_commandList->ResourceBarrier( 1, &CD3DX12_RESOURCE_BARRIER::Transition( m_renderTargets[m_frameIndex].Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT)); ThrowIfFailed(m_commandList->Close()); } // Wait for pending GPU work to complete. void DX12CudaInterop::WaitForGpu() { // Schedule a Signal command in the queue. ThrowIfFailed( m_commandQueue->Signal(m_fence.Get(), m_fenceValues[m_frameIndex])); // Wait until the fence has been processed. ThrowIfFailed( m_fence->SetEventOnCompletion(m_fenceValues[m_frameIndex], m_fenceEvent)); WaitForSingleObjectEx(m_fenceEvent, INFINITE, FALSE); // Increment the fence value for the current frame. m_fenceValues[m_frameIndex]++; } // Prepare to render the next frame. void DX12CudaInterop::MoveToNextFrame() { const UINT64 currentFenceValue = m_fenceValues[m_frameIndex]; cudaExternalSemaphoreWaitParams externalSemaphoreWaitParams; memset(&externalSemaphoreWaitParams, 0, sizeof(externalSemaphoreWaitParams)); externalSemaphoreWaitParams.params.fence.value = currentFenceValue; externalSemaphoreWaitParams.flags = 0; checkCudaErrors(cudaWaitExternalSemaphoresAsync( &m_externalSemaphore, &externalSemaphoreWaitParams, 1, m_streamToRun)); m_AnimTime += 0.01f; RunSineWaveKernel(vertBufWidth, vertBufHeight, (Vertex *)m_cudaDevVertptr, m_streamToRun, m_AnimTime); cudaExternalSemaphoreSignalParams externalSemaphoreSignalParams; memset(&externalSemaphoreSignalParams, 0, sizeof(externalSemaphoreSignalParams)); m_fenceValues[m_frameIndex] = currentFenceValue + 1; externalSemaphoreSignalParams.params.fence.value = m_fenceValues[m_frameIndex]; externalSemaphoreSignalParams.flags = 0; checkCudaErrors(cudaSignalExternalSemaphoresAsync( &m_externalSemaphore, &externalSemaphoreSignalParams, 1, m_streamToRun)); // Update the frame index. m_frameIndex = m_swapChain->GetCurrentBackBufferIndex(); // If the next frame is not ready to be rendered yet, wait until it is ready. if (m_fence->GetCompletedValue() < m_fenceValues[m_frameIndex]) { ThrowIfFailed(m_fence->SetEventOnCompletion(m_fenceValues[m_frameIndex], m_fenceEvent)); WaitForSingleObjectEx(m_fenceEvent, INFINITE, FALSE); } // Set the fence value for the next frame. m_fenceValues[m_frameIndex] = currentFenceValue + 2; }