From fcb23487a8a919a88f82bc5b9792e7b30166ccdc Mon Sep 17 00:00:00 2001 From: Mahesh Doijade Date: Thu, 4 Oct 2018 20:05:23 +0530 Subject: [PATCH] -- Add simpleD3D12 sample which demonstrates DX12-CUDA interop rendering sinewave -- Update the documentation with description about this sample --- LICENSE | 3 + README.md | 13 +- Samples/simpleD3D12/DX12CudaSample.cpp | 118 ++ Samples/simpleD3D12/DX12CudaSample.h | 58 + Samples/simpleD3D12/DXSampleHelper.h | 171 ++ Samples/simpleD3D12/Main.cpp | 20 + Samples/simpleD3D12/NsightEclipse.xml | 62 + Samples/simpleD3D12/README.md | 49 + Samples/simpleD3D12/ShaderStructs.h | 36 + Samples/simpleD3D12/Win32Application.cpp | 118 ++ Samples/simpleD3D12/Win32Application.h | 29 + Samples/simpleD3D12/d3dx12.h | 1532 +++++++++++++++++ Samples/simpleD3D12/shaders.hlsl | 34 + Samples/simpleD3D12/simpleD3D12.cpp | 509 ++++++ Samples/simpleD3D12/simpleD3D12.h | 127 ++ Samples/simpleD3D12/simpleD3D12_vs2015.sln | 20 + .../simpleD3D12/simpleD3D12_vs2015.vcxproj | 134 ++ Samples/simpleD3D12/simpleD3D12_vs2017.sln | 20 + .../simpleD3D12/simpleD3D12_vs2017.vcxproj | 135 ++ Samples/simpleD3D12/sinewave_cuda.cu | 55 + Samples/simpleD3D12/stdafx.cpp | 12 + Samples/simpleD3D12/stdafx.h | 33 + 22 files changed, 3284 insertions(+), 4 deletions(-) create mode 100755 Samples/simpleD3D12/DX12CudaSample.cpp create mode 100755 Samples/simpleD3D12/DX12CudaSample.h create mode 100755 Samples/simpleD3D12/DXSampleHelper.h create mode 100755 Samples/simpleD3D12/Main.cpp create mode 100644 Samples/simpleD3D12/NsightEclipse.xml create mode 100644 Samples/simpleD3D12/README.md create mode 100755 Samples/simpleD3D12/ShaderStructs.h create mode 100755 Samples/simpleD3D12/Win32Application.cpp create mode 100755 Samples/simpleD3D12/Win32Application.h create mode 100755 Samples/simpleD3D12/d3dx12.h create mode 100755 Samples/simpleD3D12/shaders.hlsl create mode 100755 Samples/simpleD3D12/simpleD3D12.cpp create mode 100755 Samples/simpleD3D12/simpleD3D12.h create mode 100644 Samples/simpleD3D12/simpleD3D12_vs2015.sln create mode 100644 Samples/simpleD3D12/simpleD3D12_vs2015.vcxproj create mode 100644 Samples/simpleD3D12/simpleD3D12_vs2017.sln create mode 100644 Samples/simpleD3D12/simpleD3D12_vs2017.vcxproj create mode 100755 Samples/simpleD3D12/sinewave_cuda.cu create mode 100755 Samples/simpleD3D12/stdafx.cpp create mode 100755 Samples/simpleD3D12/stdafx.h diff --git a/LICENSE b/LICENSE index afba8ea0..31b222ff 100644 --- a/LICENSE +++ b/LICENSE @@ -23,3 +23,6 @@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +For additional information on the license terms, see the CUDA EULA at +https://docs.nvidia.com/cuda/eula/index.html diff --git a/README.md b/README.md index ea2a56bc..c6a3e101 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ This section describes the release notes for the CUDA Samples on GitHub only. * Added `simpleCudaGraphs`. Demonstrates CUDA Graphs creation, instantiation and launch using Graphs APIs and Stream Capture APIs. * Added `conjugateGradientCudaGraphs`. Demonstrates conjugate gradient solver on GPU using CUBLAS and CUSPARSE library calls captured and called using CUDA Graph APIs. * Added `simpleVulkan`. Demonstrates Vulkan - CUDA Interop. +* Added `simpleD3D12`. Demonstrates DX12 - CUDA Interop. * Added `UnifiedMemoryPerf`. Demonstrates performance comparision of various memory types involved in system. * Added `p2pBandwidthLatencyTest`. Demonstrates Peer-To-Peer (P2P) data transfers between pairs of GPUs and computes latency and bandwidth. * Added `systemWideAtomics`. Demonstrates system wide atomic instructions. @@ -129,10 +130,10 @@ The samples makefiles can take advantage of certain options: #### Windows **[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[shfl_scan](./Samples/shfl_scan)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | ---|---|---|---| -**[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | -**[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[simpleCUFFT](./Samples/simpleCUFFT)** | -**[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[simpleVulkan](./Samples/simpleVulkan)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | -**[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[matrixMul](./Samples/matrixMul)** | +**[simpleD3D12](./Samples/simpleD3D12)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | +**[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | +**[simpleCUFFT](./Samples/simpleCUFFT)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[simpleVulkan](./Samples/simpleVulkan)** | +**[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[matrixMul](./Samples/matrixMul)** | #### Mac OSX **[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[shfl_scan](./Samples/shfl_scan)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | @@ -169,6 +170,10 @@ Some samples can only be run on a 64-bit operating system. DirectX is a collection of APIs designed to allow development of multimedia applications on Microsoft platforms. For Microsoft platforms, NVIDIA's CUDA Driver supports DirectX. Several CUDA Samples for Windows demonstrates CUDA-DirectX Interoperability, for building such samples one needs to install [Direct X SDK (June 2010 or newer)](http://www.microsoft.com/en-us/download/details.aspx?id=6812) , this is required to be installed on Windows 7, Windows 10 and Windows Server 2008, Other Windows OSes do not need to explicitly install the DirectX SDK. +#### DirectX12 + +DirectX 12 is a collection of advanced low-level programming APIs which can reduce driver overhead, designed to allow development of multimedia applications on Microsoft platforms starting with Windows 10 OS onwards. For Microsoft platforms, NVIDIA's CUDA Driver supports DirectX. Few CUDA Samples for Windows demonstrates CUDA-DirectX12 Interoperability, for building such samples one needs to install [Windows 10 SDK or higher](https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk), with VS 2015 or VS 2017. + #### OpenGL OpenGL is a graphics library used for 2D and 3D rendering. On systems which support OpenGL, NVIDIA's OpenGL implementation is provided with the CUDA Driver. diff --git a/Samples/simpleD3D12/DX12CudaSample.cpp b/Samples/simpleD3D12/DX12CudaSample.cpp new file mode 100755 index 00000000..43f8fa57 --- /dev/null +++ b/Samples/simpleD3D12/DX12CudaSample.cpp @@ -0,0 +1,118 @@ +/* +* Copyright 1993-2018 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#include "stdafx.h" +#include "DX12CudaSample.h" +#include + +using namespace Microsoft::WRL; + +DX12CudaSample::DX12CudaSample(UINT width, UINT height, std::string name) : + m_width(width), + m_height(height), + m_title(name), + m_useWarpDevice(false) +{ + m_aspectRatio = static_cast(width) / static_cast(height); +} + +DX12CudaSample::~DX12CudaSample() +{ +} + +std::wstring DX12CudaSample::string2wstring(const std::string& s) +{ + int len; + int slength = (int)s.length() + 1; + len = MultiByteToWideChar(CP_ACP, 0, s.c_str(), slength, 0, 0); + wchar_t* buf = new wchar_t[len]; + MultiByteToWideChar(CP_ACP, 0, s.c_str(), slength, buf, len); + std::wstring r(buf); + delete[] buf; + return r; +} +// Helper function for resolving the full path of assets. +std::wstring DX12CudaSample::GetAssetFullPath(const char* assetName) +{ + LPTSTR lpBuffer = new char[4096]; + GetCurrentDirectory(FILENAME_MAX, lpBuffer); + char *tmp = sdkFindFilePath((const char *)assetName, "simpleD3D12"); + if (tmp == NULL) + { + throw std::exception("File not found"); + } + for (int i = 0; i < strlen(tmp); i++) + { + if (tmp[i] == '/') + { + tmp[i] = '\\'; + } + } + m_assetsPath = lpBuffer; + m_assetsPath = m_assetsPath + "\\" + tmp; + + std::wstring stemp = string2wstring(m_assetsPath); + + return stemp; +} + +// Helper function for acquiring the first available hardware adapter that supports Direct3D 12. +// If no such adapter can be found, *ppAdapter will be set to nullptr. +_Use_decl_annotations_ +void DX12CudaSample::GetHardwareAdapter(IDXGIFactory2* pFactory, IDXGIAdapter1** ppAdapter) +{ + ComPtr adapter; + *ppAdapter = nullptr; + + for (UINT adapterIndex = 0; DXGI_ERROR_NOT_FOUND != pFactory->EnumAdapters1(adapterIndex, &adapter); ++adapterIndex) + { + DXGI_ADAPTER_DESC1 desc; + adapter->GetDesc1(&desc); + + if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) + { + // Don't select the Basic Render Driver adapter. + // If you want a software adapter, pass in "/warp" on the command line. + continue; + } + + // Check to see if the adapter supports Direct3D 12, but don't create the + // actual device yet. + if (SUCCEEDED(D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, _uuidof(ID3D12Device), nullptr))) + { + break; + } + } + + *ppAdapter = adapter.Detach(); +} + +// Helper function for setting the window's title text. +void DX12CudaSample::SetCustomWindowText(const char* text) +{ + std::string windowText = m_title + text; + SetWindowText(Win32Application::GetHwnd(), windowText.c_str()); +} + +// Helper function for parsing any supplied command line args. +_Use_decl_annotations_ +void DX12CudaSample::ParseCommandLineArgs(WCHAR* argv[], int argc) +{ + for (int i = 1; i < argc; ++i) + { + if (_wcsnicmp(argv[i], L"-warp", wcslen(argv[i])) == 0 || + _wcsnicmp(argv[i], L"/warp", wcslen(argv[i])) == 0) + { + m_useWarpDevice = true; + m_title = m_title + " (WARP)"; + } + } +} diff --git a/Samples/simpleD3D12/DX12CudaSample.h b/Samples/simpleD3D12/DX12CudaSample.h new file mode 100755 index 00000000..3feca57d --- /dev/null +++ b/Samples/simpleD3D12/DX12CudaSample.h @@ -0,0 +1,58 @@ +/* +* Copyright 1993-2018 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#pragma once + +#include "DXSampleHelper.h" +#include "Win32Application.h" + +class DX12CudaSample +{ +public: + DX12CudaSample(UINT width, UINT height, std::string name); + virtual ~DX12CudaSample(); + + virtual void OnInit() = 0; + virtual void OnRender() = 0; + virtual void OnDestroy() = 0; + + // Samples override the event handlers to handle specific messages. + virtual void OnKeyDown(UINT8 /*key*/) {} + virtual void OnKeyUp(UINT8 /*key*/) {} + + // Accessors. + UINT GetWidth() const { return m_width; } + UINT GetHeight() const { return m_height; } + const CHAR* GetTitle() const { return m_title.c_str(); } + + void ParseCommandLineArgs(_In_reads_(argc) WCHAR* argv[], int argc); + +protected: + std::wstring GetAssetFullPath(const char* assetName); + void GetHardwareAdapter(_In_ IDXGIFactory2* pFactory, _Outptr_result_maybenull_ IDXGIAdapter1** ppAdapter); + void SetCustomWindowText(const char* text); + std::wstring string2wstring(const std::string& s); + + // Viewport dimensions. + UINT m_width; + UINT m_height; + float m_aspectRatio; + + // Adapter info. + bool m_useWarpDevice; + +private: + // Root assets path. + std::string m_assetsPath; + + // Window title. + std::string m_title; +}; diff --git a/Samples/simpleD3D12/DXSampleHelper.h b/Samples/simpleD3D12/DXSampleHelper.h new file mode 100755 index 00000000..ff598543 --- /dev/null +++ b/Samples/simpleD3D12/DXSampleHelper.h @@ -0,0 +1,171 @@ +/* +* Copyright 1993-2018 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#pragma once + + +// Note that while ComPtr is used to manage the lifetime of resources on the CPU, +// it has no understanding of the lifetime of resources on the GPU. Apps must account +// for the GPU lifetime of resources to avoid destroying objects that may still be +// referenced by the GPU. +using Microsoft::WRL::ComPtr; + + +inline std::string HrToString(HRESULT hr) +{ + char s_str[64] = {}; + sprintf_s(s_str, "HRESULT of 0x%08X", static_cast(hr)); + return std::string(s_str); +} + +class HrException : public std::runtime_error +{ +public: + HrException(HRESULT hr) : std::runtime_error(HrToString(hr)), m_hr(hr) {} + HRESULT Error() const { return m_hr; } +private: + const HRESULT m_hr; +}; + +#define SAFE_RELEASE(p) if (p) (p)->Release() + +inline void ThrowIfFailed(HRESULT hr) +{ + if (FAILED(hr)) + { + throw HrException(hr); + } +} + +inline HRESULT ReadDataFromFile(LPCWSTR filename, byte** data, UINT* size) +{ + using namespace Microsoft::WRL; + + CREATEFILE2_EXTENDED_PARAMETERS extendedParams = {}; + extendedParams.dwSize = sizeof(CREATEFILE2_EXTENDED_PARAMETERS); + extendedParams.dwFileAttributes = FILE_ATTRIBUTE_NORMAL; + extendedParams.dwFileFlags = FILE_FLAG_SEQUENTIAL_SCAN; + extendedParams.dwSecurityQosFlags = SECURITY_ANONYMOUS; + extendedParams.lpSecurityAttributes = nullptr; + extendedParams.hTemplateFile = nullptr; + + Wrappers::FileHandle file(CreateFile2(filename, GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING, &extendedParams)); + if (file.Get() == INVALID_HANDLE_VALUE) + { + throw std::exception(); + } + + FILE_STANDARD_INFO fileInfo = {}; + if (!GetFileInformationByHandleEx(file.Get(), FileStandardInfo, &fileInfo, sizeof(fileInfo))) + { + throw std::exception(); + } + + if (fileInfo.EndOfFile.HighPart != 0) + { + throw std::exception(); + } + + *data = reinterpret_cast(malloc(fileInfo.EndOfFile.LowPart)); + *size = fileInfo.EndOfFile.LowPart; + + if (!ReadFile(file.Get(), *data, fileInfo.EndOfFile.LowPart, nullptr, nullptr)) + { + throw std::exception(); + } + + return S_OK; +} + +// Assign a name to the object to aid with debugging. +#if defined(_DEBUG) || defined(DBG) +inline void SetName(ID3D12Object* pObject, LPCWSTR name) +{ + pObject->SetName(name); +} +inline void SetNameIndexed(ID3D12Object* pObject, LPCWSTR name, UINT index) +{ + WCHAR fullName[50]; + if (swprintf_s(fullName, L"%s[%u]", name, index) > 0) + { + pObject->SetName(fullName); + } +} +#else +inline void SetName(ID3D12Object*, LPCWSTR) +{ +} +inline void SetNameIndexed(ID3D12Object*, LPCWSTR, UINT) +{ +} +#endif + +// Naming helper for ComPtr. +// Assigns the name of the variable as the name of the object. +// The indexed variant will include the index in the name of the object. +#define NAME_D3D12_OBJECT(x) SetName((x).Get(), L#x) +#define NAME_D3D12_OBJECT_INDEXED(x, n) SetNameIndexed((x)[n].Get(), L#x, n) + +inline UINT CalculateConstantBufferByteSize(UINT byteSize) +{ + // Constant buffer size is required to be aligned. + return (byteSize + (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1)) & ~(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1); +} + +#ifdef D3D_COMPILE_STANDARD_FILE_INCLUDE +inline Microsoft::WRL::ComPtr CompileShader( + const std::wstring& filename, + const D3D_SHADER_MACRO* defines, + const std::string& entrypoint, + const std::string& target) +{ + UINT compileFlags = 0; +#if defined(_DEBUG) || defined(DBG) + compileFlags = D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION; +#endif + + HRESULT hr; + + Microsoft::WRL::ComPtr byteCode = nullptr; + Microsoft::WRL::ComPtr errors; + hr = D3DCompileFromFile(filename.c_str(), defines, D3D_COMPILE_STANDARD_FILE_INCLUDE, + entrypoint.c_str(), target.c_str(), compileFlags, 0, &byteCode, &errors); + + if (errors != nullptr) + { + OutputDebugStringA((char*)errors->GetBufferPointer()); + } + ThrowIfFailed(hr); + + return byteCode; +} +#endif + +// Resets all elements in a ComPtr array. +template +void ResetComPtrArray(T* comPtrArray) +{ + for (auto &i : *comPtrArray) + { + i.Reset(); + } +} + + +// Resets all elements in a unique_ptr array. +template +void ResetUniquePtrArray(T* uniquePtrArray) +{ + for (auto &i : *uniquePtrArray) + { + i.reset(); + } +} diff --git a/Samples/simpleD3D12/Main.cpp b/Samples/simpleD3D12/Main.cpp new file mode 100755 index 00000000..0eac0383 --- /dev/null +++ b/Samples/simpleD3D12/Main.cpp @@ -0,0 +1,20 @@ +/* +* Copyright 1993-2018 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#include "stdafx.h" +#include "simpleD3D12.h" + +_Use_decl_annotations_ +int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE, LPSTR, int nCmdShow) +{ + DX12CudaInterop sample(1280, 720, "D3D12 CUDA Interop"); + return Win32Application::Run(&sample, hInstance, nCmdShow); +} diff --git a/Samples/simpleD3D12/NsightEclipse.xml b/Samples/simpleD3D12/NsightEclipse.xml new file mode 100644 index 00000000..3fdff74f --- /dev/null +++ b/Samples/simpleD3D12/NsightEclipse.xml @@ -0,0 +1,62 @@ + + + + simpleD3D12 + + cudaWaitExternalSemaphoresAsync + cudaSignalExternalSemaphoresAsync + cudaImportExternalSemaphore + cudaExternalMemoryGetMappedBuffer + cudaImportExternalMemory + cudaDestroyExternalSemaphore + cudaDestroyExternalMemory + + + whole + + ./ + ../ + ../../common/inc + + + Graphics Interop + CUDA DX12 Interop + Image Processing + + + D3D + d3d12 + + + + + + true + simpleD3D12.cpp + + DirectX12 + + + 1:CUDA Basic Topics + 2:Graphics Interop + + sm30 + sm35 + sm37 + sm50 + sm52 + sm60 + sm61 + sm70 + sm75 + + + windows10 + + + + all + + Simple D3D12 CUDA Interop + exe + diff --git a/Samples/simpleD3D12/README.md b/Samples/simpleD3D12/README.md new file mode 100644 index 00000000..6e236a72 --- /dev/null +++ b/Samples/simpleD3D12/README.md @@ -0,0 +1,49 @@ +# simpleD3D12 - Simple D3D12 CUDA Interop + +## Description + +A program which demonstrates Direct3D12 interoperability with CUDA. The program creates a sinewave in DX12 vertex buffer which is created using CUDA kernels. DX12 and CUDA synchronizes using DirectX12 Fences. Direct3D then renders the results on the screen. A DirectX12 Capable NVIDIA GPU is required on Windows10 or higher OS. + +## Key Concepts + +Graphics Interop, CUDA DX12 Interop, Image Processing + +## Supported SM Architectures + +[SM 3.0 ](https://developer.nvidia.com/cuda-gpus) [SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) + +## Supported OSes + +Windows + +## Supported CPU Architecture + +x86_64 + +## CUDA APIs involved + +### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) +cudaWaitExternalSemaphoresAsync, cudaSignalExternalSemaphoresAsync, cudaImportExternalSemaphore, cudaExternalMemoryGetMappedBuffer, cudaImportExternalMemory, cudaDestroyExternalSemaphore, cudaDestroyExternalMemory + +## Dependencies needed to build/run +[DirectX12](../../README.md#directx12) + +## Prerequisites + +Download and install the [CUDA Toolkit 10.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Make sure the dependencies mentioned in [Dependencies]() section above are installed. + +## Build and Run + +### Windows +The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format: +``` +*_vs.sln - for Visual Studio +``` +Each individual sample has its own set of solution files in its directory: + +To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used. +> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details." + +## References (for more details) + diff --git a/Samples/simpleD3D12/ShaderStructs.h b/Samples/simpleD3D12/ShaderStructs.h new file mode 100755 index 00000000..79a71e4b --- /dev/null +++ b/Samples/simpleD3D12/ShaderStructs.h @@ -0,0 +1,36 @@ +/* +* Copyright 1993-2018 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#pragma once + +#include "stdafx.h" +#include +#include "helper_cuda.h" + +using namespace DirectX; + +struct Vertex +{ + XMFLOAT3 position; + XMFLOAT4 color; +}; + +#if 0 +// Constant buffer used to send MVP matrices to the vertex shader. +struct ModelViewProjectionConstantBuffer +{ + XMFLOAT4X4 model; + XMFLOAT4X4 view; + XMFLOAT4X4 projection; +}; +#endif + +void RunSineWaveKernel(size_t mesh_width, size_t mesh_height, Vertex *cudaDevVertptr, cudaStream_t streamToRun, float AnimTime); \ No newline at end of file diff --git a/Samples/simpleD3D12/Win32Application.cpp b/Samples/simpleD3D12/Win32Application.cpp new file mode 100755 index 00000000..6a551448 --- /dev/null +++ b/Samples/simpleD3D12/Win32Application.cpp @@ -0,0 +1,118 @@ +/* +* Copyright 1993-2018 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#include "stdafx.h" +#include "Win32Application.h" + +HWND Win32Application::m_hwnd = nullptr; + +int Win32Application::Run(DX12CudaSample* pSample, HINSTANCE hInstance, int nCmdShow) +{ + // Parse the command line parameters + int argc; + LPWSTR* argv = CommandLineToArgvW(GetCommandLineW(), &argc); + pSample->ParseCommandLineArgs(argv, argc); + LocalFree(argv); + + // Initialize the window class. + WNDCLASSEX windowClass = { 0 }; + windowClass.cbSize = sizeof(WNDCLASSEX); + windowClass.style = CS_HREDRAW | CS_VREDRAW; + windowClass.lpfnWndProc = WindowProc; + windowClass.hInstance = hInstance; + windowClass.hCursor = LoadCursor(NULL, IDC_ARROW); + windowClass.lpszClassName = "DX12CudaSampleClass"; + RegisterClassEx(&windowClass); + + RECT windowRect = { 0, 0, static_cast(pSample->GetWidth()), static_cast(pSample->GetHeight()) }; + AdjustWindowRect(&windowRect, WS_OVERLAPPEDWINDOW, FALSE); + + // Create the window and store a handle to it. + m_hwnd = CreateWindow( + windowClass.lpszClassName, + pSample->GetTitle(), + WS_OVERLAPPEDWINDOW, + CW_USEDEFAULT, + CW_USEDEFAULT, + windowRect.right - windowRect.left, + windowRect.bottom - windowRect.top, + nullptr, // We have no parent window. + nullptr, // We aren't using menus. + hInstance, + pSample); + + // Initialize the sample. OnInit is defined in each child-implementation of DXSample. + pSample->OnInit(); + + ShowWindow(m_hwnd, nCmdShow); + + // Main sample loop. + MSG msg = {}; + while (msg.message != WM_QUIT) + { + // Process any messages in the queue. + if (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) + { + TranslateMessage(&msg); + DispatchMessage(&msg); + } + } + + pSample->OnDestroy(); + + // Return this part of the WM_QUIT message to Windows. + return static_cast(msg.wParam); +} + +// Main message handler for the sample. +LRESULT CALLBACK Win32Application::WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) +{ + DX12CudaSample* pSample = reinterpret_cast(GetWindowLongPtr(hWnd, GWLP_USERDATA)); + + switch (message) + { + case WM_CREATE: + { + // Save the DXSample* passed in to CreateWindow. + LPCREATESTRUCT pCreateStruct = reinterpret_cast(lParam); + SetWindowLongPtr(hWnd, GWLP_USERDATA, reinterpret_cast(pCreateStruct->lpCreateParams)); + } + return 0; + + case WM_KEYDOWN: + if (pSample) + { + pSample->OnKeyDown(static_cast(wParam)); + } + return 0; + + case WM_KEYUP: + if (pSample) + { + pSample->OnKeyUp(static_cast(wParam)); + } + return 0; + + case WM_PAINT: + if (pSample) + { + pSample->OnRender(); + } + return 0; + + case WM_DESTROY: + PostQuitMessage(0); + return 0; + } + + // Handle any messages the switch statement didn't. + return DefWindowProc(hWnd, message, wParam, lParam); +} diff --git a/Samples/simpleD3D12/Win32Application.h b/Samples/simpleD3D12/Win32Application.h new file mode 100755 index 00000000..d54562d9 --- /dev/null +++ b/Samples/simpleD3D12/Win32Application.h @@ -0,0 +1,29 @@ +/* +* Copyright 1993-2018 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#pragma once + +#include "DX12CudaSample.h" + +class DX12CudaSample; + +class Win32Application +{ +public: + static int Run(DX12CudaSample* pSample, HINSTANCE hInstance, int nCmdShow); + static HWND GetHwnd() { return m_hwnd; } + +protected: + static LRESULT CALLBACK WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam); + +private: + static HWND m_hwnd; +}; diff --git a/Samples/simpleD3D12/d3dx12.h b/Samples/simpleD3D12/d3dx12.h new file mode 100755 index 00000000..dc4608ae --- /dev/null +++ b/Samples/simpleD3D12/d3dx12.h @@ -0,0 +1,1532 @@ +////////////////////////////////////////////////////////////////////////////// +// +// Copyright (C) Microsoft Corporation. All Rights Reserved. +// +// File: d3dx12.h +// Content: D3DX12 utility library +// +////////////////////////////////////////////////////////////////////////////// + +#ifndef __D3DX12_H__ +#define __D3DX12_H__ + +#include "d3d12.h" + +#if defined( __cplusplus ) + +struct CD3DX12_DEFAULT {}; +extern const DECLSPEC_SELECTANY CD3DX12_DEFAULT D3D12_DEFAULT; + +//------------------------------------------------------------------------------------------------ +inline bool operator==( const D3D12_VIEWPORT& l, const D3D12_VIEWPORT& r ) +{ + return l.TopLeftX == r.TopLeftX && l.TopLeftY == r.TopLeftY && l.Width == r.Width && + l.Height == r.Height && l.MinDepth == r.MinDepth && l.MaxDepth == r.MaxDepth; +} + +//------------------------------------------------------------------------------------------------ +inline bool operator!=( const D3D12_VIEWPORT& l, const D3D12_VIEWPORT& r ) +{ return !( l == r ); } + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RECT : public D3D12_RECT +{ + CD3DX12_RECT() + {} + explicit CD3DX12_RECT( const D3D12_RECT& o ) : + D3D12_RECT( o ) + {} + explicit CD3DX12_RECT( + LONG Left, + LONG Top, + LONG Right, + LONG Bottom ) + { + left = Left; + top = Top; + right = Right; + bottom = Bottom; + } + ~CD3DX12_RECT() {} + operator const D3D12_RECT&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_BOX : public D3D12_BOX +{ + CD3DX12_BOX() + {} + explicit CD3DX12_BOX( const D3D12_BOX& o ) : + D3D12_BOX( o ) + {} + explicit CD3DX12_BOX( + LONG Left, + LONG Right ) + { + left = Left; + top = 0; + front = 0; + right = Right; + bottom = 1; + back = 1; + } + explicit CD3DX12_BOX( + LONG Left, + LONG Top, + LONG Right, + LONG Bottom ) + { + left = Left; + top = Top; + front = 0; + right = Right; + bottom = Bottom; + back = 1; + } + explicit CD3DX12_BOX( + LONG Left, + LONG Top, + LONG Front, + LONG Right, + LONG Bottom, + LONG Back ) + { + left = Left; + top = Top; + front = Front; + right = Right; + bottom = Bottom; + back = Back; + } + ~CD3DX12_BOX() {} + operator const D3D12_BOX&() const { return *this; } +}; +inline bool operator==( const D3D12_BOX& l, const D3D12_BOX& r ) +{ + return l.left == r.left && l.top == r.top && l.front == r.front && + l.right == r.right && l.bottom == r.bottom && l.back == r.back; +} +inline bool operator!=( const D3D12_BOX& l, const D3D12_BOX& r ) +{ return !( l == r ); } + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_DEPTH_STENCIL_DESC : public D3D12_DEPTH_STENCIL_DESC +{ + CD3DX12_DEPTH_STENCIL_DESC() + {} + explicit CD3DX12_DEPTH_STENCIL_DESC( const D3D12_DEPTH_STENCIL_DESC& o ) : + D3D12_DEPTH_STENCIL_DESC( o ) + {} + explicit CD3DX12_DEPTH_STENCIL_DESC( CD3DX12_DEFAULT ) + { + DepthEnable = TRUE; + DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; + DepthFunc = D3D12_COMPARISON_FUNC_LESS; + StencilEnable = FALSE; + StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK; + StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK; + const D3D12_DEPTH_STENCILOP_DESC defaultStencilOp = + { D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_COMPARISON_FUNC_ALWAYS }; + FrontFace = defaultStencilOp; + BackFace = defaultStencilOp; + } + explicit CD3DX12_DEPTH_STENCIL_DESC( + BOOL depthEnable, + D3D12_DEPTH_WRITE_MASK depthWriteMask, + D3D12_COMPARISON_FUNC depthFunc, + BOOL stencilEnable, + UINT8 stencilReadMask, + UINT8 stencilWriteMask, + D3D12_STENCIL_OP frontStencilFailOp, + D3D12_STENCIL_OP frontStencilDepthFailOp, + D3D12_STENCIL_OP frontStencilPassOp, + D3D12_COMPARISON_FUNC frontStencilFunc, + D3D12_STENCIL_OP backStencilFailOp, + D3D12_STENCIL_OP backStencilDepthFailOp, + D3D12_STENCIL_OP backStencilPassOp, + D3D12_COMPARISON_FUNC backStencilFunc ) + { + DepthEnable = depthEnable; + DepthWriteMask = depthWriteMask; + DepthFunc = depthFunc; + StencilEnable = stencilEnable; + StencilReadMask = stencilReadMask; + StencilWriteMask = stencilWriteMask; + FrontFace.StencilFailOp = frontStencilFailOp; + FrontFace.StencilDepthFailOp = frontStencilDepthFailOp; + FrontFace.StencilPassOp = frontStencilPassOp; + FrontFace.StencilFunc = frontStencilFunc; + BackFace.StencilFailOp = backStencilFailOp; + BackFace.StencilDepthFailOp = backStencilDepthFailOp; + BackFace.StencilPassOp = backStencilPassOp; + BackFace.StencilFunc = backStencilFunc; + } + ~CD3DX12_DEPTH_STENCIL_DESC() {} + operator const D3D12_DEPTH_STENCIL_DESC&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_BLEND_DESC : public D3D12_BLEND_DESC +{ + CD3DX12_BLEND_DESC() + {} + explicit CD3DX12_BLEND_DESC( const D3D12_BLEND_DESC& o ) : + D3D12_BLEND_DESC( o ) + {} + explicit CD3DX12_BLEND_DESC( CD3DX12_DEFAULT ) + { + AlphaToCoverageEnable = FALSE; + IndependentBlendEnable = FALSE; + const D3D12_RENDER_TARGET_BLEND_DESC defaultRenderTargetBlendDesc = + { + FALSE,FALSE, + D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, + D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, + D3D12_LOGIC_OP_NOOP, + D3D12_COLOR_WRITE_ENABLE_ALL, + }; + for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i) + RenderTarget[ i ] = defaultRenderTargetBlendDesc; + } + ~CD3DX12_BLEND_DESC() {} + operator const D3D12_BLEND_DESC&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RASTERIZER_DESC : public D3D12_RASTERIZER_DESC +{ + CD3DX12_RASTERIZER_DESC() + {} + explicit CD3DX12_RASTERIZER_DESC( const D3D12_RASTERIZER_DESC& o ) : + D3D12_RASTERIZER_DESC( o ) + {} + explicit CD3DX12_RASTERIZER_DESC( CD3DX12_DEFAULT ) + { + FillMode = D3D12_FILL_MODE_SOLID; + CullMode = D3D12_CULL_MODE_BACK; + FrontCounterClockwise = FALSE; + DepthBias = D3D12_DEFAULT_DEPTH_BIAS; + DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; + SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; + DepthClipEnable = TRUE; + MultisampleEnable = FALSE; + AntialiasedLineEnable = FALSE; + ForcedSampleCount = 0; + ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; + } + explicit CD3DX12_RASTERIZER_DESC( + D3D12_FILL_MODE fillMode, + D3D12_CULL_MODE cullMode, + BOOL frontCounterClockwise, + INT depthBias, + FLOAT depthBiasClamp, + FLOAT slopeScaledDepthBias, + BOOL depthClipEnable, + BOOL multisampleEnable, + BOOL antialiasedLineEnable, + UINT forcedSampleCount, + D3D12_CONSERVATIVE_RASTERIZATION_MODE conservativeRaster) + { + FillMode = fillMode; + CullMode = cullMode; + FrontCounterClockwise = frontCounterClockwise; + DepthBias = depthBias; + DepthBiasClamp = depthBiasClamp; + SlopeScaledDepthBias = slopeScaledDepthBias; + DepthClipEnable = depthClipEnable; + MultisampleEnable = multisampleEnable; + AntialiasedLineEnable = antialiasedLineEnable; + ForcedSampleCount = forcedSampleCount; + ConservativeRaster = conservativeRaster; + } + ~CD3DX12_RASTERIZER_DESC() {} + operator const D3D12_RASTERIZER_DESC&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RESOURCE_ALLOCATION_INFO : public D3D12_RESOURCE_ALLOCATION_INFO +{ + CD3DX12_RESOURCE_ALLOCATION_INFO() + {} + explicit CD3DX12_RESOURCE_ALLOCATION_INFO( const D3D12_RESOURCE_ALLOCATION_INFO& o ) : + D3D12_RESOURCE_ALLOCATION_INFO( o ) + {} + CD3DX12_RESOURCE_ALLOCATION_INFO( + UINT64 size, + UINT64 alignment ) + { + SizeInBytes = size; + Alignment = alignment; + } + operator const D3D12_RESOURCE_ALLOCATION_INFO&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_HEAP_PROPERTIES : public D3D12_HEAP_PROPERTIES +{ + CD3DX12_HEAP_PROPERTIES() + {} + explicit CD3DX12_HEAP_PROPERTIES(const D3D12_HEAP_PROPERTIES &o) : + D3D12_HEAP_PROPERTIES(o) + {} + CD3DX12_HEAP_PROPERTIES( + D3D12_CPU_PAGE_PROPERTY cpuPageProperty, + D3D12_MEMORY_POOL memoryPoolPreference, + UINT creationNodeMask = 1, + UINT nodeMask = 1 ) + { + Type = D3D12_HEAP_TYPE_CUSTOM; + CPUPageProperty = cpuPageProperty; + MemoryPoolPreference = memoryPoolPreference; + CreationNodeMask = creationNodeMask; + VisibleNodeMask = nodeMask; + } + explicit CD3DX12_HEAP_PROPERTIES( + D3D12_HEAP_TYPE type, + UINT creationNodeMask = 1, + UINT nodeMask = 1 ) + { + Type = type; + CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + CreationNodeMask = creationNodeMask; + VisibleNodeMask = nodeMask; + } + operator const D3D12_HEAP_PROPERTIES&() const { return *this; } + bool IsCPUAccessible() const + { + return Type == D3D12_HEAP_TYPE_UPLOAD || Type == D3D12_HEAP_TYPE_READBACK || (Type == D3D12_HEAP_TYPE_CUSTOM && + (CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE || CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_BACK)); + } +}; +inline bool operator==( const D3D12_HEAP_PROPERTIES& l, const D3D12_HEAP_PROPERTIES& r ) +{ + return l.Type == r.Type && l.CPUPageProperty == r.CPUPageProperty && + l.MemoryPoolPreference == r.MemoryPoolPreference && + l.CreationNodeMask == r.CreationNodeMask && + l.VisibleNodeMask == r.VisibleNodeMask; +} +inline bool operator!=( const D3D12_HEAP_PROPERTIES& l, const D3D12_HEAP_PROPERTIES& r ) +{ return !( l == r ); } + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_HEAP_DESC : public D3D12_HEAP_DESC +{ + CD3DX12_HEAP_DESC() + {} + explicit CD3DX12_HEAP_DESC(const D3D12_HEAP_DESC &o) : + D3D12_HEAP_DESC(o) + {} + CD3DX12_HEAP_DESC( + UINT64 size, + D3D12_HEAP_PROPERTIES properties, + UINT64 alignment = 0, + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) + { + SizeInBytes = size; + Properties = properties; + Alignment = alignment; + Flags = flags; + } + CD3DX12_HEAP_DESC( + UINT64 size, + D3D12_HEAP_TYPE type, + UINT64 alignment = 0, + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) + { + SizeInBytes = size; + Properties = CD3DX12_HEAP_PROPERTIES( type ); + Alignment = alignment; + Flags = flags; + } + CD3DX12_HEAP_DESC( + UINT64 size, + D3D12_CPU_PAGE_PROPERTY cpuPageProperty, + D3D12_MEMORY_POOL memoryPoolPreference, + UINT64 alignment = 0, + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) + { + SizeInBytes = size; + Properties = CD3DX12_HEAP_PROPERTIES( cpuPageProperty, memoryPoolPreference ); + Alignment = alignment; + Flags = flags; + } + CD3DX12_HEAP_DESC( + const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo, + D3D12_HEAP_PROPERTIES properties, + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) + { + SizeInBytes = resAllocInfo.SizeInBytes; + Properties = properties; + Alignment = resAllocInfo.Alignment; + Flags = flags; + } + CD3DX12_HEAP_DESC( + const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo, + D3D12_HEAP_TYPE type, + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) + { + SizeInBytes = resAllocInfo.SizeInBytes; + Properties = CD3DX12_HEAP_PROPERTIES( type ); + Alignment = resAllocInfo.Alignment; + Flags = flags; + } + CD3DX12_HEAP_DESC( + const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo, + D3D12_CPU_PAGE_PROPERTY cpuPageProperty, + D3D12_MEMORY_POOL memoryPoolPreference, + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) + { + SizeInBytes = resAllocInfo.SizeInBytes; + Properties = CD3DX12_HEAP_PROPERTIES( cpuPageProperty, memoryPoolPreference ); + Alignment = resAllocInfo.Alignment; + Flags = flags; + } + operator const D3D12_HEAP_DESC&() const { return *this; } + bool IsCPUAccessible() const + { return static_cast< const CD3DX12_HEAP_PROPERTIES* >( &Properties )->IsCPUAccessible(); } +}; +inline bool operator==( const D3D12_HEAP_DESC& l, const D3D12_HEAP_DESC& r ) +{ + return l.SizeInBytes == r.SizeInBytes && + l.Properties == r.Properties && + l.Alignment == r.Alignment && + l.Flags == r.Flags; +} +inline bool operator!=( const D3D12_HEAP_DESC& l, const D3D12_HEAP_DESC& r ) +{ return !( l == r ); } + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_CLEAR_VALUE : public D3D12_CLEAR_VALUE +{ + CD3DX12_CLEAR_VALUE() + {} + explicit CD3DX12_CLEAR_VALUE(const D3D12_CLEAR_VALUE &o) : + D3D12_CLEAR_VALUE(o) + {} + CD3DX12_CLEAR_VALUE( + DXGI_FORMAT format, + const FLOAT color[4] ) + { + Format = format; + memcpy( Color, color, sizeof( Color ) ); + } + CD3DX12_CLEAR_VALUE( + DXGI_FORMAT format, + FLOAT depth, + UINT8 stencil ) + { + Format = format; + /* Use memcpy to preserve NAN values */ + memcpy( &DepthStencil.Depth, &depth, sizeof( depth ) ); + DepthStencil.Stencil = stencil; + } + operator const D3D12_CLEAR_VALUE&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RANGE : public D3D12_RANGE +{ + CD3DX12_RANGE() + {} + explicit CD3DX12_RANGE(const D3D12_RANGE &o) : + D3D12_RANGE(o) + {} + CD3DX12_RANGE( + SIZE_T begin, + SIZE_T end ) + { + Begin = begin; + End = end; + } + operator const D3D12_RANGE&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_SHADER_BYTECODE : public D3D12_SHADER_BYTECODE +{ + CD3DX12_SHADER_BYTECODE() + {} + explicit CD3DX12_SHADER_BYTECODE(const D3D12_SHADER_BYTECODE &o) : + D3D12_SHADER_BYTECODE(o) + {} + CD3DX12_SHADER_BYTECODE( + ID3DBlob* pShaderBlob ) + { + pShaderBytecode = pShaderBlob->GetBufferPointer(); + BytecodeLength = pShaderBlob->GetBufferSize(); + } + CD3DX12_SHADER_BYTECODE( + void* _pShaderBytecode, + SIZE_T bytecodeLength ) + { + pShaderBytecode = _pShaderBytecode; + BytecodeLength = bytecodeLength; + } + operator const D3D12_SHADER_BYTECODE&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_TILED_RESOURCE_COORDINATE : public D3D12_TILED_RESOURCE_COORDINATE +{ + CD3DX12_TILED_RESOURCE_COORDINATE() + {} + explicit CD3DX12_TILED_RESOURCE_COORDINATE(const D3D12_TILED_RESOURCE_COORDINATE &o) : + D3D12_TILED_RESOURCE_COORDINATE(o) + {} + CD3DX12_TILED_RESOURCE_COORDINATE( + UINT x, + UINT y, + UINT z, + UINT subresource ) + { + X = x; + Y = y; + Z = z; + Subresource = subresource; + } + operator const D3D12_TILED_RESOURCE_COORDINATE&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_TILE_REGION_SIZE : public D3D12_TILE_REGION_SIZE +{ + CD3DX12_TILE_REGION_SIZE() + {} + explicit CD3DX12_TILE_REGION_SIZE(const D3D12_TILE_REGION_SIZE &o) : + D3D12_TILE_REGION_SIZE(o) + {} + CD3DX12_TILE_REGION_SIZE( + UINT numTiles, + BOOL useBox, + UINT width, + UINT16 height, + UINT16 depth ) + { + NumTiles = numTiles; + UseBox = useBox; + Width = width; + Height = height; + Depth = depth; + } + operator const D3D12_TILE_REGION_SIZE&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_SUBRESOURCE_TILING : public D3D12_SUBRESOURCE_TILING +{ + CD3DX12_SUBRESOURCE_TILING() + {} + explicit CD3DX12_SUBRESOURCE_TILING(const D3D12_SUBRESOURCE_TILING &o) : + D3D12_SUBRESOURCE_TILING(o) + {} + CD3DX12_SUBRESOURCE_TILING( + UINT widthInTiles, + UINT16 heightInTiles, + UINT16 depthInTiles, + UINT startTileIndexInOverallResource ) + { + WidthInTiles = widthInTiles; + HeightInTiles = heightInTiles; + DepthInTiles = depthInTiles; + StartTileIndexInOverallResource = startTileIndexInOverallResource; + } + operator const D3D12_SUBRESOURCE_TILING&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_TILE_SHAPE : public D3D12_TILE_SHAPE +{ + CD3DX12_TILE_SHAPE() + {} + explicit CD3DX12_TILE_SHAPE(const D3D12_TILE_SHAPE &o) : + D3D12_TILE_SHAPE(o) + {} + CD3DX12_TILE_SHAPE( + UINT widthInTexels, + UINT heightInTexels, + UINT depthInTexels ) + { + WidthInTexels = widthInTexels; + HeightInTexels = heightInTexels; + DepthInTexels = depthInTexels; + } + operator const D3D12_TILE_SHAPE&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RESOURCE_BARRIER : public D3D12_RESOURCE_BARRIER +{ + CD3DX12_RESOURCE_BARRIER() + {} + explicit CD3DX12_RESOURCE_BARRIER(const D3D12_RESOURCE_BARRIER &o) : + D3D12_RESOURCE_BARRIER(o) + {} + static inline CD3DX12_RESOURCE_BARRIER Transition( + _In_ ID3D12Resource* pResource, + D3D12_RESOURCE_STATES stateBefore, + D3D12_RESOURCE_STATES stateAfter, + UINT subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + D3D12_RESOURCE_BARRIER_FLAGS flags = D3D12_RESOURCE_BARRIER_FLAG_NONE) + { + CD3DX12_RESOURCE_BARRIER result; + ZeroMemory(&result, sizeof(result)); + D3D12_RESOURCE_BARRIER &barrier = result; + result.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + result.Flags = flags; + barrier.Transition.pResource = pResource; + barrier.Transition.StateBefore = stateBefore; + barrier.Transition.StateAfter = stateAfter; + barrier.Transition.Subresource = subresource; + return result; + } + static inline CD3DX12_RESOURCE_BARRIER Aliasing( + _In_ ID3D12Resource* pResourceBefore, + _In_ ID3D12Resource* pResourceAfter) + { + CD3DX12_RESOURCE_BARRIER result; + ZeroMemory(&result, sizeof(result)); + D3D12_RESOURCE_BARRIER &barrier = result; + result.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING; + barrier.Aliasing.pResourceBefore = pResourceBefore; + barrier.Aliasing.pResourceAfter = pResourceAfter; + return result; + } + static inline CD3DX12_RESOURCE_BARRIER UAV( + _In_ ID3D12Resource* pResource) + { + CD3DX12_RESOURCE_BARRIER result; + ZeroMemory(&result, sizeof(result)); + D3D12_RESOURCE_BARRIER &barrier = result; + result.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + barrier.UAV.pResource = pResource; + return result; + } + operator const D3D12_RESOURCE_BARRIER&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_PACKED_MIP_INFO : public D3D12_PACKED_MIP_INFO +{ + CD3DX12_PACKED_MIP_INFO() + {} + explicit CD3DX12_PACKED_MIP_INFO(const D3D12_PACKED_MIP_INFO &o) : + D3D12_PACKED_MIP_INFO(o) + {} + CD3DX12_PACKED_MIP_INFO( + UINT8 numStandardMips, + UINT8 numPackedMips, + UINT numTilesForPackedMips, + UINT startTileIndexInOverallResource ) + { + NumStandardMips = numStandardMips; + NumPackedMips = numPackedMips; + NumTilesForPackedMips = numTilesForPackedMips; + StartTileIndexInOverallResource = startTileIndexInOverallResource; + } + operator const D3D12_PACKED_MIP_INFO&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_SUBRESOURCE_FOOTPRINT : public D3D12_SUBRESOURCE_FOOTPRINT +{ + CD3DX12_SUBRESOURCE_FOOTPRINT() + {} + explicit CD3DX12_SUBRESOURCE_FOOTPRINT(const D3D12_SUBRESOURCE_FOOTPRINT &o) : + D3D12_SUBRESOURCE_FOOTPRINT(o) + {} + CD3DX12_SUBRESOURCE_FOOTPRINT( + DXGI_FORMAT format, + UINT width, + UINT height, + UINT depth, + UINT rowPitch ) + { + Format = format; + Width = width; + Height = height; + Depth = depth; + RowPitch = rowPitch; + } + explicit CD3DX12_SUBRESOURCE_FOOTPRINT( + const D3D12_RESOURCE_DESC& resDesc, + UINT rowPitch ) + { + Format = resDesc.Format; + Width = UINT( resDesc.Width ); + Height = resDesc.Height; + Depth = (resDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? resDesc.DepthOrArraySize : 1); + RowPitch = rowPitch; + } + operator const D3D12_SUBRESOURCE_FOOTPRINT&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_TEXTURE_COPY_LOCATION : public D3D12_TEXTURE_COPY_LOCATION +{ + CD3DX12_TEXTURE_COPY_LOCATION() + {} + explicit CD3DX12_TEXTURE_COPY_LOCATION(const D3D12_TEXTURE_COPY_LOCATION &o) : + D3D12_TEXTURE_COPY_LOCATION(o) + {} + CD3DX12_TEXTURE_COPY_LOCATION(ID3D12Resource* pRes) { pResource = pRes; } + CD3DX12_TEXTURE_COPY_LOCATION(ID3D12Resource* pRes, D3D12_PLACED_SUBRESOURCE_FOOTPRINT const& Footprint) + { + pResource = pRes; + Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + PlacedFootprint = Footprint; + } + CD3DX12_TEXTURE_COPY_LOCATION(ID3D12Resource* pRes, UINT Sub) + { + pResource = pRes; + Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + SubresourceIndex = Sub; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_DESCRIPTOR_RANGE : public D3D12_DESCRIPTOR_RANGE +{ + CD3DX12_DESCRIPTOR_RANGE() { } + explicit CD3DX12_DESCRIPTOR_RANGE(const D3D12_DESCRIPTOR_RANGE &o) : + D3D12_DESCRIPTOR_RANGE(o) + {} + CD3DX12_DESCRIPTOR_RANGE( + D3D12_DESCRIPTOR_RANGE_TYPE rangeType, + UINT numDescriptors, + UINT baseShaderRegister, + UINT registerSpace = 0, + UINT offsetInDescriptorsFromTableStart = + D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) + { + Init(rangeType, numDescriptors, baseShaderRegister, registerSpace, offsetInDescriptorsFromTableStart); + } + + inline void Init( + D3D12_DESCRIPTOR_RANGE_TYPE rangeType, + UINT numDescriptors, + UINT baseShaderRegister, + UINT registerSpace = 0, + UINT offsetInDescriptorsFromTableStart = + D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) + { + Init(*this, rangeType, numDescriptors, baseShaderRegister, registerSpace, offsetInDescriptorsFromTableStart); + } + + static inline void Init( + _Out_ D3D12_DESCRIPTOR_RANGE &range, + D3D12_DESCRIPTOR_RANGE_TYPE rangeType, + UINT numDescriptors, + UINT baseShaderRegister, + UINT registerSpace = 0, + UINT offsetInDescriptorsFromTableStart = + D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) + { + range.RangeType = rangeType; + range.NumDescriptors = numDescriptors; + range.BaseShaderRegister = baseShaderRegister; + range.RegisterSpace = registerSpace; + range.OffsetInDescriptorsFromTableStart = offsetInDescriptorsFromTableStart; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_ROOT_DESCRIPTOR_TABLE : public D3D12_ROOT_DESCRIPTOR_TABLE +{ + CD3DX12_ROOT_DESCRIPTOR_TABLE() {} + explicit CD3DX12_ROOT_DESCRIPTOR_TABLE(const D3D12_ROOT_DESCRIPTOR_TABLE &o) : + D3D12_ROOT_DESCRIPTOR_TABLE(o) + {} + CD3DX12_ROOT_DESCRIPTOR_TABLE( + UINT numDescriptorRanges, + _In_reads_opt_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* _pDescriptorRanges) + { + Init(numDescriptorRanges, _pDescriptorRanges); + } + + inline void Init( + UINT numDescriptorRanges, + _In_reads_opt_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* _pDescriptorRanges) + { + Init(*this, numDescriptorRanges, _pDescriptorRanges); + } + + static inline void Init( + _Out_ D3D12_ROOT_DESCRIPTOR_TABLE &rootDescriptorTable, + UINT numDescriptorRanges, + _In_reads_opt_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* _pDescriptorRanges) + { + rootDescriptorTable.NumDescriptorRanges = numDescriptorRanges; + rootDescriptorTable.pDescriptorRanges = _pDescriptorRanges; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_ROOT_CONSTANTS : public D3D12_ROOT_CONSTANTS +{ + CD3DX12_ROOT_CONSTANTS() {} + explicit CD3DX12_ROOT_CONSTANTS(const D3D12_ROOT_CONSTANTS &o) : + D3D12_ROOT_CONSTANTS(o) + {} + CD3DX12_ROOT_CONSTANTS( + UINT num32BitValues, + UINT shaderRegister, + UINT registerSpace = 0) + { + Init(num32BitValues, shaderRegister, registerSpace); + } + + inline void Init( + UINT num32BitValues, + UINT shaderRegister, + UINT registerSpace = 0) + { + Init(*this, num32BitValues, shaderRegister, registerSpace); + } + + static inline void Init( + _Out_ D3D12_ROOT_CONSTANTS &rootConstants, + UINT num32BitValues, + UINT shaderRegister, + UINT registerSpace = 0) + { + rootConstants.Num32BitValues = num32BitValues; + rootConstants.ShaderRegister = shaderRegister; + rootConstants.RegisterSpace = registerSpace; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_ROOT_DESCRIPTOR : public D3D12_ROOT_DESCRIPTOR +{ + CD3DX12_ROOT_DESCRIPTOR() {} + explicit CD3DX12_ROOT_DESCRIPTOR(const D3D12_ROOT_DESCRIPTOR &o) : + D3D12_ROOT_DESCRIPTOR(o) + {} + CD3DX12_ROOT_DESCRIPTOR( + UINT shaderRegister, + UINT registerSpace = 0) + { + Init(shaderRegister, registerSpace); + } + + inline void Init( + UINT shaderRegister, + UINT registerSpace = 0) + { + Init(*this, shaderRegister, registerSpace); + } + + static inline void Init(_Out_ D3D12_ROOT_DESCRIPTOR &table, UINT shaderRegister, UINT registerSpace = 0) + { + table.ShaderRegister = shaderRegister; + table.RegisterSpace = registerSpace; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_ROOT_PARAMETER : public D3D12_ROOT_PARAMETER +{ + CD3DX12_ROOT_PARAMETER() {} + explicit CD3DX12_ROOT_PARAMETER(const D3D12_ROOT_PARAMETER &o) : + D3D12_ROOT_PARAMETER(o) + {} + + static inline void InitAsDescriptorTable( + _Out_ D3D12_ROOT_PARAMETER &rootParam, + UINT numDescriptorRanges, + _In_reads_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* pDescriptorRanges, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_DESCRIPTOR_TABLE::Init(rootParam.DescriptorTable, numDescriptorRanges, pDescriptorRanges); + } + + static inline void InitAsConstants( + _Out_ D3D12_ROOT_PARAMETER &rootParam, + UINT num32BitValues, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_CONSTANTS::Init(rootParam.Constants, num32BitValues, shaderRegister, registerSpace); + } + + static inline void InitAsConstantBufferView( + _Out_ D3D12_ROOT_PARAMETER &rootParam, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_DESCRIPTOR::Init(rootParam.Descriptor, shaderRegister, registerSpace); + } + + static inline void InitAsShaderResourceView( + _Out_ D3D12_ROOT_PARAMETER &rootParam, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_DESCRIPTOR::Init(rootParam.Descriptor, shaderRegister, registerSpace); + } + + static inline void InitAsUnorderedAccessView( + _Out_ D3D12_ROOT_PARAMETER &rootParam, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_DESCRIPTOR::Init(rootParam.Descriptor, shaderRegister, registerSpace); + } + + inline void InitAsDescriptorTable( + UINT numDescriptorRanges, + _In_reads_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* pDescriptorRanges, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + InitAsDescriptorTable(*this, numDescriptorRanges, pDescriptorRanges, visibility); + } + + inline void InitAsConstants( + UINT num32BitValues, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + InitAsConstants(*this, num32BitValues, shaderRegister, registerSpace, visibility); + } + + inline void InitAsConstantBufferView( + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + InitAsConstantBufferView(*this, shaderRegister, registerSpace, visibility); + } + + inline void InitAsShaderResourceView( + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + InitAsShaderResourceView(*this, shaderRegister, registerSpace, visibility); + } + + inline void InitAsUnorderedAccessView( + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + InitAsUnorderedAccessView(*this, shaderRegister, registerSpace, visibility); + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_STATIC_SAMPLER_DESC : public D3D12_STATIC_SAMPLER_DESC +{ + CD3DX12_STATIC_SAMPLER_DESC() {} + explicit CD3DX12_STATIC_SAMPLER_DESC(const D3D12_STATIC_SAMPLER_DESC &o) : + D3D12_STATIC_SAMPLER_DESC(o) + {} + CD3DX12_STATIC_SAMPLER_DESC( + UINT shaderRegister, + D3D12_FILTER filter = D3D12_FILTER_ANISOTROPIC, + D3D12_TEXTURE_ADDRESS_MODE addressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE addressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE addressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + FLOAT mipLODBias = 0, + UINT maxAnisotropy = 16, + D3D12_COMPARISON_FUNC comparisonFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL, + D3D12_STATIC_BORDER_COLOR borderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE, + FLOAT minLOD = 0.f, + FLOAT maxLOD = D3D12_FLOAT32_MAX, + D3D12_SHADER_VISIBILITY shaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + UINT registerSpace = 0) + { + Init( + shaderRegister, + filter, + addressU, + addressV, + addressW, + mipLODBias, + maxAnisotropy, + comparisonFunc, + borderColor, + minLOD, + maxLOD, + shaderVisibility, + registerSpace); + } + + static inline void Init( + _Out_ D3D12_STATIC_SAMPLER_DESC &samplerDesc, + UINT shaderRegister, + D3D12_FILTER filter = D3D12_FILTER_ANISOTROPIC, + D3D12_TEXTURE_ADDRESS_MODE addressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE addressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE addressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + FLOAT mipLODBias = 0, + UINT maxAnisotropy = 16, + D3D12_COMPARISON_FUNC comparisonFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL, + D3D12_STATIC_BORDER_COLOR borderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE, + FLOAT minLOD = 0.f, + FLOAT maxLOD = D3D12_FLOAT32_MAX, + D3D12_SHADER_VISIBILITY shaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + UINT registerSpace = 0) + { + samplerDesc.ShaderRegister = shaderRegister; + samplerDesc.Filter = filter; + samplerDesc.AddressU = addressU; + samplerDesc.AddressV = addressV; + samplerDesc.AddressW = addressW; + samplerDesc.MipLODBias = mipLODBias; + samplerDesc.MaxAnisotropy = maxAnisotropy; + samplerDesc.ComparisonFunc = comparisonFunc; + samplerDesc.BorderColor = borderColor; + samplerDesc.MinLOD = minLOD; + samplerDesc.MaxLOD = maxLOD; + samplerDesc.ShaderVisibility = shaderVisibility; + samplerDesc.RegisterSpace = registerSpace; + } + inline void Init( + UINT shaderRegister, + D3D12_FILTER filter = D3D12_FILTER_ANISOTROPIC, + D3D12_TEXTURE_ADDRESS_MODE addressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE addressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE addressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + FLOAT mipLODBias = 0, + UINT maxAnisotropy = 16, + D3D12_COMPARISON_FUNC comparisonFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL, + D3D12_STATIC_BORDER_COLOR borderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE, + FLOAT minLOD = 0.f, + FLOAT maxLOD = D3D12_FLOAT32_MAX, + D3D12_SHADER_VISIBILITY shaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + UINT registerSpace = 0) + { + Init( + *this, + shaderRegister, + filter, + addressU, + addressV, + addressW, + mipLODBias, + maxAnisotropy, + comparisonFunc, + borderColor, + minLOD, + maxLOD, + shaderVisibility, + registerSpace); + } + +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_ROOT_SIGNATURE_DESC : public D3D12_ROOT_SIGNATURE_DESC +{ + CD3DX12_ROOT_SIGNATURE_DESC() {} + explicit CD3DX12_ROOT_SIGNATURE_DESC(const D3D12_ROOT_SIGNATURE_DESC &o) : + D3D12_ROOT_SIGNATURE_DESC(o) + {} + CD3DX12_ROOT_SIGNATURE_DESC( + UINT numParameters, + _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER* _pParameters, + UINT numStaticSamplers = 0, + _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = NULL, + D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) + { + Init(numParameters, _pParameters, numStaticSamplers, _pStaticSamplers, flags); + } + CD3DX12_ROOT_SIGNATURE_DESC(CD3DX12_DEFAULT) + { + Init(0, NULL, 0, NULL, D3D12_ROOT_SIGNATURE_FLAG_NONE); + } + + inline void Init( + UINT numParameters, + _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER* _pParameters, + UINT numStaticSamplers = 0, + _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = NULL, + D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) + { + Init(*this, numParameters, _pParameters, numStaticSamplers, _pStaticSamplers, flags); + } + + static inline void Init( + _Out_ D3D12_ROOT_SIGNATURE_DESC &desc, + UINT numParameters, + _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER* _pParameters, + UINT numStaticSamplers = 0, + _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = NULL, + D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) + { + desc.NumParameters = numParameters; + desc.pParameters = _pParameters; + desc.NumStaticSamplers = numStaticSamplers; + desc.pStaticSamplers = _pStaticSamplers; + desc.Flags = flags; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_CPU_DESCRIPTOR_HANDLE : public D3D12_CPU_DESCRIPTOR_HANDLE +{ + CD3DX12_CPU_DESCRIPTOR_HANDLE() {} + explicit CD3DX12_CPU_DESCRIPTOR_HANDLE(const D3D12_CPU_DESCRIPTOR_HANDLE &o) : + D3D12_CPU_DESCRIPTOR_HANDLE(o) + {} + CD3DX12_CPU_DESCRIPTOR_HANDLE(CD3DX12_DEFAULT) { ptr = 0; } + CD3DX12_CPU_DESCRIPTOR_HANDLE(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE &other, INT offsetScaledByIncrementSize) + { + InitOffsetted(other, offsetScaledByIncrementSize); + } + CD3DX12_CPU_DESCRIPTOR_HANDLE(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE &other, INT offsetInDescriptors, UINT descriptorIncrementSize) + { + InitOffsetted(other, offsetInDescriptors, descriptorIncrementSize); + } + CD3DX12_CPU_DESCRIPTOR_HANDLE& Offset(INT offsetInDescriptors, UINT descriptorIncrementSize) + { + ptr += offsetInDescriptors * descriptorIncrementSize; + return *this; + } + CD3DX12_CPU_DESCRIPTOR_HANDLE& Offset(INT offsetScaledByIncrementSize) + { + ptr += offsetScaledByIncrementSize; + return *this; + } + bool operator==(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE& other) + { + return (ptr == other.ptr); + } + bool operator!=(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE& other) + { + return (ptr != other.ptr); + } + CD3DX12_CPU_DESCRIPTOR_HANDLE &operator=(const D3D12_CPU_DESCRIPTOR_HANDLE &other) + { + ptr = other.ptr; + return *this; + } + + inline void InitOffsetted(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE &base, INT offsetScaledByIncrementSize) + { + InitOffsetted(*this, base, offsetScaledByIncrementSize); + } + + inline void InitOffsetted(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE &base, INT offsetInDescriptors, UINT descriptorIncrementSize) + { + InitOffsetted(*this, base, offsetInDescriptors, descriptorIncrementSize); + } + + static inline void InitOffsetted(_Out_ D3D12_CPU_DESCRIPTOR_HANDLE &handle, _In_ const D3D12_CPU_DESCRIPTOR_HANDLE &base, INT offsetScaledByIncrementSize) + { + handle.ptr = base.ptr + offsetScaledByIncrementSize; + } + + static inline void InitOffsetted(_Out_ D3D12_CPU_DESCRIPTOR_HANDLE &handle, _In_ const D3D12_CPU_DESCRIPTOR_HANDLE &base, INT offsetInDescriptors, UINT descriptorIncrementSize) + { + handle.ptr = base.ptr + offsetInDescriptors * descriptorIncrementSize; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_GPU_DESCRIPTOR_HANDLE : public D3D12_GPU_DESCRIPTOR_HANDLE +{ + CD3DX12_GPU_DESCRIPTOR_HANDLE() {} + explicit CD3DX12_GPU_DESCRIPTOR_HANDLE(const D3D12_GPU_DESCRIPTOR_HANDLE &o) : + D3D12_GPU_DESCRIPTOR_HANDLE(o) + {} + CD3DX12_GPU_DESCRIPTOR_HANDLE(CD3DX12_DEFAULT) { ptr = 0; } + CD3DX12_GPU_DESCRIPTOR_HANDLE(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE &other, INT offsetScaledByIncrementSize) + { + InitOffsetted(other, offsetScaledByIncrementSize); + } + CD3DX12_GPU_DESCRIPTOR_HANDLE(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE &other, INT offsetInDescriptors, UINT descriptorIncrementSize) + { + InitOffsetted(other, offsetInDescriptors, descriptorIncrementSize); + } + CD3DX12_GPU_DESCRIPTOR_HANDLE& Offset(INT offsetInDescriptors, UINT descriptorIncrementSize) + { + ptr += offsetInDescriptors * descriptorIncrementSize; + return *this; + } + CD3DX12_GPU_DESCRIPTOR_HANDLE& Offset(INT offsetScaledByIncrementSize) + { + ptr += offsetScaledByIncrementSize; + return *this; + } + inline bool operator==(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE& other) + { + return (ptr == other.ptr); + } + inline bool operator!=(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE& other) + { + return (ptr != other.ptr); + } + CD3DX12_GPU_DESCRIPTOR_HANDLE &operator=(const D3D12_GPU_DESCRIPTOR_HANDLE &other) + { + ptr = other.ptr; + return *this; + } + + inline void InitOffsetted(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE &base, INT offsetScaledByIncrementSize) + { + InitOffsetted(*this, base, offsetScaledByIncrementSize); + } + + inline void InitOffsetted(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE &base, INT offsetInDescriptors, UINT descriptorIncrementSize) + { + InitOffsetted(*this, base, offsetInDescriptors, descriptorIncrementSize); + } + + static inline void InitOffsetted(_Out_ D3D12_GPU_DESCRIPTOR_HANDLE &handle, _In_ const D3D12_GPU_DESCRIPTOR_HANDLE &base, INT offsetScaledByIncrementSize) + { + handle.ptr = base.ptr + offsetScaledByIncrementSize; + } + + static inline void InitOffsetted(_Out_ D3D12_GPU_DESCRIPTOR_HANDLE &handle, _In_ const D3D12_GPU_DESCRIPTOR_HANDLE &base, INT offsetInDescriptors, UINT descriptorIncrementSize) + { + handle.ptr = base.ptr + offsetInDescriptors * descriptorIncrementSize; + } +}; + +//------------------------------------------------------------------------------------------------ +inline UINT D3D12CalcSubresource( UINT MipSlice, UINT ArraySlice, UINT PlaneSlice, UINT MipLevels, UINT ArraySize ) +{ + return MipSlice + ArraySlice * MipLevels + PlaneSlice * MipLevels * ArraySize; +} + +//------------------------------------------------------------------------------------------------ +template +inline void D3D12DecomposeSubresource( UINT Subresource, UINT MipLevels, UINT ArraySize, _Out_ T& MipSlice, _Out_ U& ArraySlice, _Out_ V& PlaneSlice ) +{ + MipSlice = static_cast(Subresource % MipLevels); + ArraySlice = static_cast((Subresource / MipLevels) % ArraySize); + PlaneSlice = static_cast(Subresource / (MipLevels * ArraySize)); +} + +//------------------------------------------------------------------------------------------------ +inline UINT8 D3D12GetFormatPlaneCount( + _In_ ID3D12Device* pDevice, + DXGI_FORMAT Format + ) +{ + D3D12_FEATURE_DATA_FORMAT_INFO formatInfo = {Format}; + if (FAILED(pDevice->CheckFeatureSupport(D3D12_FEATURE_FORMAT_INFO, &formatInfo, sizeof(formatInfo)))) + { + return 0; + } + return formatInfo.PlaneCount; +} + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RESOURCE_DESC : public D3D12_RESOURCE_DESC +{ + CD3DX12_RESOURCE_DESC() + {} + explicit CD3DX12_RESOURCE_DESC( const D3D12_RESOURCE_DESC& o ) : + D3D12_RESOURCE_DESC( o ) + {} + CD3DX12_RESOURCE_DESC( + D3D12_RESOURCE_DIMENSION dimension, + UINT64 alignment, + UINT64 width, + UINT height, + UINT16 depthOrArraySize, + UINT16 mipLevels, + DXGI_FORMAT format, + UINT sampleCount, + UINT sampleQuality, + D3D12_TEXTURE_LAYOUT layout, + D3D12_RESOURCE_FLAGS flags ) + { + Dimension = dimension; + Alignment = alignment; + Width = width; + Height = height; + DepthOrArraySize = depthOrArraySize; + MipLevels = mipLevels; + Format = format; + SampleDesc.Count = sampleCount; + SampleDesc.Quality = sampleQuality; + Layout = layout; + Flags = flags; + } + static inline CD3DX12_RESOURCE_DESC Buffer( + const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE ) + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_BUFFER, resAllocInfo.Alignment, resAllocInfo.SizeInBytes, + 1, 1, 1, DXGI_FORMAT_UNKNOWN, 1, 0, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, flags ); + } + static inline CD3DX12_RESOURCE_DESC Buffer( + UINT64 width, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + UINT64 alignment = 0 ) + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_BUFFER, alignment, width, 1, 1, 1, + DXGI_FORMAT_UNKNOWN, 1, 0, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, flags ); + } + static inline CD3DX12_RESOURCE_DESC Tex1D( + DXGI_FORMAT format, + UINT64 width, + UINT16 arraySize = 1, + UINT16 mipLevels = 0, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + UINT64 alignment = 0 ) + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_TEXTURE1D, alignment, width, 1, arraySize, + mipLevels, format, 1, 0, layout, flags ); + } + static inline CD3DX12_RESOURCE_DESC Tex2D( + DXGI_FORMAT format, + UINT64 width, + UINT height, + UINT16 arraySize = 1, + UINT16 mipLevels = 0, + UINT sampleCount = 1, + UINT sampleQuality = 0, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + UINT64 alignment = 0 ) + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_TEXTURE2D, alignment, width, height, arraySize, + mipLevels, format, sampleCount, sampleQuality, layout, flags ); + } + static inline CD3DX12_RESOURCE_DESC Tex3D( + DXGI_FORMAT format, + UINT64 width, + UINT height, + UINT16 depth, + UINT16 mipLevels = 0, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + UINT64 alignment = 0 ) + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_TEXTURE3D, alignment, width, height, depth, + mipLevels, format, 1, 0, layout, flags ); + } + inline UINT16 Depth() const + { return (Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? DepthOrArraySize : 1); } + inline UINT16 ArraySize() const + { return (Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D ? DepthOrArraySize : 1); } + inline UINT8 PlaneCount(_In_ ID3D12Device* pDevice) const + { return D3D12GetFormatPlaneCount(pDevice, Format); } + inline UINT Subresources(_In_ ID3D12Device* pDevice) const + { return MipLevels * ArraySize() * PlaneCount(pDevice); } + inline UINT CalcSubresource(UINT MipSlice, UINT ArraySlice, UINT PlaneSlice) + { return D3D12CalcSubresource(MipSlice, ArraySlice, PlaneSlice, MipLevels, ArraySize()); } + operator const D3D12_RESOURCE_DESC&() const { return *this; } +}; +inline bool operator==( const D3D12_RESOURCE_DESC& l, const D3D12_RESOURCE_DESC& r ) +{ + return l.Dimension == r.Dimension && + l.Alignment == r.Alignment && + l.Width == r.Width && + l.Height == r.Height && + l.DepthOrArraySize == r.DepthOrArraySize && + l.MipLevels == r.MipLevels && + l.Format == r.Format && + l.SampleDesc.Count == r.SampleDesc.Count && + l.SampleDesc.Quality == r.SampleDesc.Quality && + l.Layout == r.Layout && + l.Flags == r.Flags; +} +inline bool operator!=( const D3D12_RESOURCE_DESC& l, const D3D12_RESOURCE_DESC& r ) +{ return !( l == r ); } + +//------------------------------------------------------------------------------------------------ +// Row-by-row memcpy +inline void MemcpySubresource( + _In_ const D3D12_MEMCPY_DEST* pDest, + _In_ const D3D12_SUBRESOURCE_DATA* pSrc, + SIZE_T RowSizeInBytes, + UINT NumRows, + UINT NumSlices) +{ + for (UINT z = 0; z < NumSlices; ++z) + { + BYTE* pDestSlice = reinterpret_cast(pDest->pData) + pDest->SlicePitch * z; + const BYTE* pSrcSlice = reinterpret_cast(pSrc->pData) + pSrc->SlicePitch * z; + for (UINT y = 0; y < NumRows; ++y) + { + memcpy(pDestSlice + pDest->RowPitch * y, + pSrcSlice + pSrc->RowPitch * y, + RowSizeInBytes); + } + } +} + +//------------------------------------------------------------------------------------------------ +// Returns required size of a buffer to be used for data upload +inline UINT64 GetRequiredIntermediateSize( + _In_ ID3D12Resource* pDestinationResource, + _In_range_(0,D3D12_REQ_SUBRESOURCES) UINT FirstSubresource, + _In_range_(0,D3D12_REQ_SUBRESOURCES-FirstSubresource) UINT NumSubresources) +{ + D3D12_RESOURCE_DESC Desc = pDestinationResource->GetDesc(); + UINT64 RequiredSize = 0; + + ID3D12Device* pDevice; + pDestinationResource->GetDevice(__uuidof(*pDevice), reinterpret_cast(&pDevice)); + pDevice->GetCopyableFootprints(&Desc, FirstSubresource, NumSubresources, 0, nullptr, nullptr, nullptr, &RequiredSize); + pDevice->Release(); + + return RequiredSize; +} + +//------------------------------------------------------------------------------------------------ +// All arrays must be populated (e.g. by calling GetCopyableFootprints) +inline UINT64 UpdateSubresources( + _In_ ID3D12GraphicsCommandList* pCmdList, + _In_ ID3D12Resource* pDestinationResource, + _In_ ID3D12Resource* pIntermediate, + _In_range_(0,D3D12_REQ_SUBRESOURCES) UINT FirstSubresource, + _In_range_(0,D3D12_REQ_SUBRESOURCES-FirstSubresource) UINT NumSubresources, + UINT64 RequiredSize, + _In_reads_(NumSubresources) const D3D12_PLACED_SUBRESOURCE_FOOTPRINT* pLayouts, + _In_reads_(NumSubresources) const UINT* pNumRows, + _In_reads_(NumSubresources) const UINT64* pRowSizesInBytes, + _In_reads_(NumSubresources) const D3D12_SUBRESOURCE_DATA* pSrcData) +{ + // Minor validation + D3D12_RESOURCE_DESC IntermediateDesc = pIntermediate->GetDesc(); + D3D12_RESOURCE_DESC DestinationDesc = pDestinationResource->GetDesc(); + if (IntermediateDesc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER || + IntermediateDesc.Width < RequiredSize + pLayouts[0].Offset || + RequiredSize > (SIZE_T)-1 || + (DestinationDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && + (FirstSubresource != 0 || NumSubresources != 1))) + { + return 0; + } + + BYTE* pData; + HRESULT hr = pIntermediate->Map(0, NULL, reinterpret_cast(&pData)); + if (FAILED(hr)) + { + return 0; + } + + for (UINT i = 0; i < NumSubresources; ++i) + { + if (pRowSizesInBytes[i] > (SIZE_T)-1) return 0; + D3D12_MEMCPY_DEST DestData = { pData + pLayouts[i].Offset, pLayouts[i].Footprint.RowPitch, pLayouts[i].Footprint.RowPitch * pNumRows[i] }; + MemcpySubresource(&DestData, &pSrcData[i], (SIZE_T)pRowSizesInBytes[i], pNumRows[i], pLayouts[i].Footprint.Depth); + } + pIntermediate->Unmap(0, NULL); + + if (DestinationDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + { + CD3DX12_BOX SrcBox( UINT( pLayouts[0].Offset ), UINT( pLayouts[0].Offset + pLayouts[0].Footprint.Width ) ); + pCmdList->CopyBufferRegion( + pDestinationResource, 0, pIntermediate, pLayouts[0].Offset, pLayouts[0].Footprint.Width); + } + else + { + for (UINT i = 0; i < NumSubresources; ++i) + { + CD3DX12_TEXTURE_COPY_LOCATION Dst(pDestinationResource, i + FirstSubresource); + CD3DX12_TEXTURE_COPY_LOCATION Src(pIntermediate, pLayouts[i]); + pCmdList->CopyTextureRegion(&Dst, 0, 0, 0, &Src, nullptr); + } + } + return RequiredSize; +} + +//------------------------------------------------------------------------------------------------ +// Heap-allocating UpdateSubresources implementation +inline UINT64 UpdateSubresources( + _In_ ID3D12GraphicsCommandList* pCmdList, + _In_ ID3D12Resource* pDestinationResource, + _In_ ID3D12Resource* pIntermediate, + UINT64 IntermediateOffset, + _In_range_(0,D3D12_REQ_SUBRESOURCES) UINT FirstSubresource, + _In_range_(0,D3D12_REQ_SUBRESOURCES-FirstSubresource) UINT NumSubresources, + _In_reads_(NumSubresources) D3D12_SUBRESOURCE_DATA* pSrcData) +{ + UINT64 RequiredSize = 0; + UINT64 MemToAlloc = static_cast(sizeof(D3D12_PLACED_SUBRESOURCE_FOOTPRINT) + sizeof(UINT) + sizeof(UINT64)) * NumSubresources; + if (MemToAlloc > SIZE_MAX) + { + return 0; + } + void* pMem = HeapAlloc(GetProcessHeap(), 0, static_cast(MemToAlloc)); + if (pMem == NULL) + { + return 0; + } + D3D12_PLACED_SUBRESOURCE_FOOTPRINT* pLayouts = reinterpret_cast(pMem); + UINT64* pRowSizesInBytes = reinterpret_cast(pLayouts + NumSubresources); + UINT* pNumRows = reinterpret_cast(pRowSizesInBytes + NumSubresources); + + D3D12_RESOURCE_DESC Desc = pDestinationResource->GetDesc(); + ID3D12Device* pDevice; + pDestinationResource->GetDevice(__uuidof(*pDevice), reinterpret_cast(&pDevice)); + pDevice->GetCopyableFootprints(&Desc, FirstSubresource, NumSubresources, IntermediateOffset, pLayouts, pNumRows, pRowSizesInBytes, &RequiredSize); + pDevice->Release(); + + UINT64 Result = UpdateSubresources(pCmdList, pDestinationResource, pIntermediate, FirstSubresource, NumSubresources, RequiredSize, pLayouts, pNumRows, pRowSizesInBytes, pSrcData); + HeapFree(GetProcessHeap(), 0, pMem); + return Result; +} + +//------------------------------------------------------------------------------------------------ +// Stack-allocating UpdateSubresources implementation +template +inline UINT64 UpdateSubresources( + _In_ ID3D12GraphicsCommandList* pCmdList, + _In_ ID3D12Resource* pDestinationResource, + _In_ ID3D12Resource* pIntermediate, + UINT64 IntermediateOffset, + _In_range_(0, MaxSubresources) UINT FirstSubresource, + _In_range_(1, MaxSubresources - FirstSubresource) UINT NumSubresources, + _In_reads_(NumSubresources) D3D12_SUBRESOURCE_DATA* pSrcData) +{ + UINT64 RequiredSize = 0; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT Layouts[MaxSubresources]; + UINT NumRows[MaxSubresources]; + UINT64 RowSizesInBytes[MaxSubresources]; + + D3D12_RESOURCE_DESC Desc = pDestinationResource->GetDesc(); + ID3D12Device* pDevice; + pDestinationResource->GetDevice(__uuidof(*pDevice), reinterpret_cast(&pDevice)); + pDevice->GetCopyableFootprints(&Desc, FirstSubresource, NumSubresources, IntermediateOffset, Layouts, NumRows, RowSizesInBytes, &RequiredSize); + pDevice->Release(); + + return UpdateSubresources(pCmdList, pDestinationResource, pIntermediate, FirstSubresource, NumSubresources, RequiredSize, Layouts, NumRows, RowSizesInBytes, pSrcData); +} + +//------------------------------------------------------------------------------------------------ +inline bool D3D12IsLayoutOpaque( D3D12_TEXTURE_LAYOUT Layout ) +{ return Layout == D3D12_TEXTURE_LAYOUT_UNKNOWN || Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; } + +//------------------------------------------------------------------------------------------------ +inline ID3D12CommandList * const * CommandListCast(ID3D12GraphicsCommandList * const * pp) +{ + // This cast is useful for passing strongly typed command list pointers into + // ExecuteCommandLists. + // This cast is valid as long as the const-ness is respected. D3D12 APIs do + // respect the const-ness of their arguments. + return reinterpret_cast(pp); +} + + +#endif // defined( __cplusplus ) + +#endif //__D3DX12_H__ + + + diff --git a/Samples/simpleD3D12/shaders.hlsl b/Samples/simpleD3D12/shaders.hlsl new file mode 100755 index 00000000..caee67c5 --- /dev/null +++ b/Samples/simpleD3D12/shaders.hlsl @@ -0,0 +1,34 @@ +/* +* Copyright 1993-2018 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + + +struct PSInput +{ + float4 position : SV_POSITION; + float4 color : COLOR; +}; + +PSInput VSMain(float3 position : POSITION, float4 color : COLOR) +{ + PSInput result; + + result.position = float4(position, 1.0f); + + // Pass the color through without modification. + result.color = color; + + return result; +} + +float4 PSMain(PSInput input) : SV_TARGET +{ + return input.color; +} diff --git a/Samples/simpleD3D12/simpleD3D12.cpp b/Samples/simpleD3D12/simpleD3D12.cpp new file mode 100755 index 00000000..a32b2110 --- /dev/null +++ b/Samples/simpleD3D12/simpleD3D12.cpp @@ -0,0 +1,509 @@ +/* +* Copyright 1993-2018 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + + +#include "stdafx.h" +#include "simpleD3D12.h" +#include + +////////////////////////////////////////////// +// WindowsSecurityAttributes implementation // +////////////////////////////////////////////// + +class WindowsSecurityAttributes { +protected: + SECURITY_ATTRIBUTES m_winSecurityAttributes; + PSECURITY_DESCRIPTOR m_winPSecurityDescriptor; + +public: + WindowsSecurityAttributes(); + ~WindowsSecurityAttributes(); + SECURITY_ATTRIBUTES * operator&(); +}; + +WindowsSecurityAttributes::WindowsSecurityAttributes() +{ + m_winPSecurityDescriptor = (PSECURITY_DESCRIPTOR)calloc(1, SECURITY_DESCRIPTOR_MIN_LENGTH + 2 * sizeof(void**)); + assert(m_winPSecurityDescriptor != (PSECURITY_DESCRIPTOR)NULL); + + PSID *ppSID = (PSID *)((PBYTE)m_winPSecurityDescriptor + SECURITY_DESCRIPTOR_MIN_LENGTH); + PACL *ppACL = (PACL *)((PBYTE)ppSID + sizeof(PSID *)); + + InitializeSecurityDescriptor(m_winPSecurityDescriptor, SECURITY_DESCRIPTOR_REVISION); + + SID_IDENTIFIER_AUTHORITY sidIdentifierAuthority = SECURITY_WORLD_SID_AUTHORITY; + AllocateAndInitializeSid(&sidIdentifierAuthority, 1, SECURITY_WORLD_RID, 0, 0, 0, 0, 0, 0, 0, ppSID); + + EXPLICIT_ACCESS explicitAccess; + ZeroMemory(&explicitAccess, sizeof(EXPLICIT_ACCESS)); + explicitAccess.grfAccessPermissions = STANDARD_RIGHTS_ALL | SPECIFIC_RIGHTS_ALL; + explicitAccess.grfAccessMode = SET_ACCESS; + explicitAccess.grfInheritance = INHERIT_ONLY; + explicitAccess.Trustee.TrusteeForm = TRUSTEE_IS_SID; + explicitAccess.Trustee.TrusteeType = TRUSTEE_IS_WELL_KNOWN_GROUP; + explicitAccess.Trustee.ptstrName = (LPTSTR)*ppSID; + + SetEntriesInAcl(1, &explicitAccess, NULL, ppACL); + + SetSecurityDescriptorDacl(m_winPSecurityDescriptor, TRUE, *ppACL, FALSE); + + m_winSecurityAttributes.nLength = sizeof(m_winSecurityAttributes); + m_winSecurityAttributes.lpSecurityDescriptor = m_winPSecurityDescriptor; + m_winSecurityAttributes.bInheritHandle = TRUE; +} + +WindowsSecurityAttributes::~WindowsSecurityAttributes() +{ + PSID* ppSID = (PSID*)((PBYTE)m_winPSecurityDescriptor + SECURITY_DESCRIPTOR_MIN_LENGTH); + PACL* ppACL = (PACL*)((PBYTE)ppSID + sizeof(PSID*)); + + if (*ppSID) { + FreeSid(*ppSID); + } + if (*ppACL) { + LocalFree(*ppACL); + } + free(m_winPSecurityDescriptor); +} + +SECURITY_ATTRIBUTES * +WindowsSecurityAttributes::operator&() +{ + return &m_winSecurityAttributes; +} + +DX12CudaInterop::DX12CudaInterop(UINT width, UINT height, std::string name) : + DX12CudaSample(width, height, name), + m_frameIndex(0), + m_scissorRect(0, 0, static_cast(width), static_cast(height)), + m_fenceValues{}, + m_rtvDescriptorSize(0) +{ + m_viewport = { 0.0f, 0.0f, static_cast(width), static_cast(height) }; + m_AnimTime = 1.0f; +} + +void DX12CudaInterop::OnInit() +{ + LoadPipeline(); + InitCuda(); + LoadAssets(); +} + +// Load the rendering pipeline dependencies. +void DX12CudaInterop::LoadPipeline() +{ + UINT dxgiFactoryFlags = 0; + +#if defined(_DEBUG) + // Enable the debug layer (requires the Graphics Tools "optional feature"). + // NOTE: Enabling the debug layer after device creation will invalidate the active device. + { + ComPtr debugController; + if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&debugController)))) + { + debugController->EnableDebugLayer(); + + // Enable additional debug layers. + dxgiFactoryFlags |= DXGI_CREATE_FACTORY_DEBUG; + } + } +#endif + + ComPtr factory; + ThrowIfFailed(CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(&factory))); + + if (m_useWarpDevice) + { + ComPtr warpAdapter; + ThrowIfFailed(factory->EnumWarpAdapter(IID_PPV_ARGS(&warpAdapter))); + + ThrowIfFailed(D3D12CreateDevice( + warpAdapter.Get(), + D3D_FEATURE_LEVEL_11_0, + IID_PPV_ARGS(&m_device) + )); + } + else + { + ComPtr hardwareAdapter; + GetHardwareAdapter(factory.Get(), &hardwareAdapter); + + ThrowIfFailed(D3D12CreateDevice( + hardwareAdapter.Get(), + D3D_FEATURE_LEVEL_11_0, + IID_PPV_ARGS(&m_device) + )); + DXGI_ADAPTER_DESC1 desc; + hardwareAdapter->GetDesc1(&desc); + m_dx12deviceluid = desc.AdapterLuid; + } + + // Describe and create the command queue. + D3D12_COMMAND_QUEUE_DESC queueDesc = {}; + queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; + queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + + ThrowIfFailed(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&m_commandQueue))); + + // Describe and create the swap chain. + DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {}; + swapChainDesc.BufferCount = FrameCount; + swapChainDesc.Width = m_width; + swapChainDesc.Height = m_height; + swapChainDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + swapChainDesc.SampleDesc.Count = 1; + + ComPtr swapChain; + ThrowIfFailed(factory->CreateSwapChainForHwnd( + m_commandQueue.Get(), // Swap chain needs the queue so that it can force a flush on it. + Win32Application::GetHwnd(), + &swapChainDesc, + nullptr, + nullptr, + &swapChain + )); + + // This sample does not support fullscreen transitions. + ThrowIfFailed(factory->MakeWindowAssociation(Win32Application::GetHwnd(), DXGI_MWA_NO_ALT_ENTER)); + + ThrowIfFailed(swapChain.As(&m_swapChain)); + m_frameIndex = m_swapChain->GetCurrentBackBufferIndex(); + + // Create descriptor heaps. + { + // Describe and create a render target view (RTV) descriptor heap. + D3D12_DESCRIPTOR_HEAP_DESC rtvHeapDesc = {}; + rtvHeapDesc.NumDescriptors = FrameCount; + rtvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + rtvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + ThrowIfFailed(m_device->CreateDescriptorHeap(&rtvHeapDesc, IID_PPV_ARGS(&m_rtvHeap))); + + m_rtvDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + } + + // Create frame resources. + { + CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(m_rtvHeap->GetCPUDescriptorHandleForHeapStart()); + + // Create a RTV and a command allocator for each frame. + for (UINT n = 0; n < FrameCount; n++) + { + ThrowIfFailed(m_swapChain->GetBuffer(n, IID_PPV_ARGS(&m_renderTargets[n]))); + m_device->CreateRenderTargetView(m_renderTargets[n].Get(), nullptr, rtvHandle); + rtvHandle.Offset(1, m_rtvDescriptorSize); + + ThrowIfFailed(m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocators[n]))); + } + } +} + +void DX12CudaInterop::InitCuda() +{ + int num_cuda_devices = 0; + checkCudaErrors(cudaGetDeviceCount(&num_cuda_devices)); + + if (!num_cuda_devices) + { + throw std::exception("No CUDA Devices found"); + } + for (UINT devId = 0; devId < num_cuda_devices; devId++) + { + cudaDeviceProp devProp; + checkCudaErrors(cudaGetDeviceProperties(&devProp, devId)); + + if ((memcmp(&m_dx12deviceluid.LowPart, devProp.luid, sizeof(m_dx12deviceluid.LowPart)) == 0) && (memcmp(&m_dx12deviceluid.HighPart, devProp.luid + sizeof(m_dx12deviceluid.LowPart), sizeof(m_dx12deviceluid.HighPart)) == 0)) + { + checkCudaErrors(cudaSetDevice(devId)); + m_cudaDeviceID = devId; + m_nodeMask = devProp.luidDeviceNodeMask; + checkCudaErrors(cudaStreamCreate(&m_streamToRun)); + printf("CUDA Device Used [%d] %s\n", devId, devProp.name); + break; + } + } +} +// Load the sample assets. +void DX12CudaInterop::LoadAssets() +{ + // Create a root signature. + { + CD3DX12_DESCRIPTOR_RANGE range; + CD3DX12_ROOT_PARAMETER parameter; + + range.Init(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0); + parameter.InitAsDescriptorTable(1, &range, D3D12_SHADER_VISIBILITY_VERTEX); + + D3D12_ROOT_SIGNATURE_FLAGS rootSignatureFlags = + D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT | // Only the input assembler stage needs access to the constant buffer. + D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS; + + CD3DX12_ROOT_SIGNATURE_DESC descRootSignature; + descRootSignature.Init(1, ¶meter, 0, nullptr, rootSignatureFlags); + ComPtr pSignature; + ComPtr pError; + ThrowIfFailed(D3D12SerializeRootSignature(&descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, pSignature.GetAddressOf(), pError.GetAddressOf())); + ThrowIfFailed(m_device->CreateRootSignature(0, pSignature->GetBufferPointer(), pSignature->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature))); + } + // Create the pipeline state, which includes compiling and loading shaders. + { + ComPtr vertexShader; + ComPtr pixelShader; + +#if defined(_DEBUG) + // Enable better shader debugging with the graphics debugging tools. + UINT compileFlags = D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION; +#else + UINT compileFlags = 0; +#endif + std::wstring filePath = GetAssetFullPath("shaders.hlsl"); + LPCWSTR result = filePath.c_str(); + ThrowIfFailed(D3DCompileFromFile(result, nullptr, nullptr, "VSMain", "vs_5_0", compileFlags, 0, &vertexShader, nullptr)); + ThrowIfFailed(D3DCompileFromFile(result, nullptr, nullptr, "PSMain", "ps_5_0", compileFlags, 0, &pixelShader, nullptr)); + + // Define the vertex input layout. + D3D12_INPUT_ELEMENT_DESC inputElementDescs[] = + { + { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 } + }; + + // Describe and create the graphics pipeline state object (PSO). + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.InputLayout = { inputElementDescs, _countof(inputElementDescs) }; + psoDesc.pRootSignature = m_rootSignature.Get(); + psoDesc.VS = CD3DX12_SHADER_BYTECODE(vertexShader.Get()); + psoDesc.PS = CD3DX12_SHADER_BYTECODE(pixelShader.Get()); + psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); + psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); + psoDesc.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC(D3D12_DEFAULT); + psoDesc.SampleMask = UINT_MAX; + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; + psoDesc.NumRenderTargets = 1; + psoDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; + psoDesc.SampleDesc.Count = 1; + ThrowIfFailed(m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&m_pipelineState))); + } + + // Create the command list. + ThrowIfFailed(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocators[m_frameIndex].Get(), m_pipelineState.Get(), IID_PPV_ARGS(&m_commandList))); + + // Command lists are created in the recording state, but there is nothing + // to record yet. The main loop expects it to be closed, so close it now. + ThrowIfFailed(m_commandList->Close()); + + // Create the vertex buffer. + { + // Define the geometry for a triangle. + vertBufWidth = m_width/2; + vertBufHeight = m_height/2; + const UINT vertexBufferSize = sizeof(Vertex)*vertBufWidth*vertBufHeight; + + ThrowIfFailed(m_device->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), + D3D12_HEAP_FLAG_SHARED, + &CD3DX12_RESOURCE_DESC::Buffer(vertexBufferSize), + D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, + nullptr, + IID_PPV_ARGS(&m_vertexBuffer))); + + // Initialize the vertex buffer view. + m_vertexBufferView.BufferLocation = m_vertexBuffer->GetGPUVirtualAddress(); + m_vertexBufferView.StrideInBytes = sizeof(Vertex); + m_vertexBufferView.SizeInBytes = vertexBufferSize; + + HANDLE sharedHandle; + WindowsSecurityAttributes windowsSecurityAttributes; + LPCWSTR name = NULL; + ThrowIfFailed(m_device->CreateSharedHandle(m_vertexBuffer.Get(), &windowsSecurityAttributes, GENERIC_ALL, name, &sharedHandle)); + + D3D12_RESOURCE_ALLOCATION_INFO d3d12ResourceAllocationInfo; + d3d12ResourceAllocationInfo = m_device->GetResourceAllocationInfo(m_nodeMask, 1, &CD3DX12_RESOURCE_DESC::Buffer(vertexBufferSize)); + size_t actualSize = d3d12ResourceAllocationInfo.SizeInBytes; + size_t alignment = d3d12ResourceAllocationInfo.Alignment; + + cudaExternalMemoryHandleDesc externalMemoryHandleDesc; + memset(&externalMemoryHandleDesc, 0, sizeof(externalMemoryHandleDesc)); + + externalMemoryHandleDesc.type = cudaExternalMemoryHandleTypeD3D12Resource; + externalMemoryHandleDesc.handle.win32.handle = sharedHandle; + externalMemoryHandleDesc.size = actualSize; + externalMemoryHandleDesc.flags = cudaExternalMemoryDedicated; + + checkCudaErrors(cudaImportExternalMemory(&m_externalMemory, &externalMemoryHandleDesc)); + + cudaExternalMemoryBufferDesc externalMemoryBufferDesc; + memset(&externalMemoryBufferDesc, 0, sizeof(externalMemoryBufferDesc)); + externalMemoryBufferDesc.offset = 0; + externalMemoryBufferDesc.size = vertexBufferSize; + externalMemoryBufferDesc.flags = 0; + + checkCudaErrors(cudaExternalMemoryGetMappedBuffer(&m_cudaDevVertptr, m_externalMemory, &externalMemoryBufferDesc)); + RunSineWaveKernel(vertBufWidth, vertBufHeight, (Vertex *)m_cudaDevVertptr, m_streamToRun, 1.0f); + checkCudaErrors(cudaStreamSynchronize(m_streamToRun)); + + } + + // Create synchronization objects and wait until assets have been uploaded to the GPU. + { + ThrowIfFailed(m_device->CreateFence(m_fenceValues[m_frameIndex], D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(&m_fence))); + + cudaExternalSemaphoreHandleDesc externalSemaphoreHandleDesc; + + memset(&externalSemaphoreHandleDesc, 0, sizeof(externalSemaphoreHandleDesc)); + WindowsSecurityAttributes windowsSecurityAttributes; + LPCWSTR name = NULL; + HANDLE sharedHandle; + externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeD3D12Fence; + m_device->CreateSharedHandle(m_fence.Get(), &windowsSecurityAttributes, GENERIC_ALL, name, &sharedHandle); + externalSemaphoreHandleDesc.handle.win32.handle = (void *)sharedHandle; + externalSemaphoreHandleDesc.flags = 0; + + checkCudaErrors(cudaImportExternalSemaphore(&m_externalSemaphore, &externalSemaphoreHandleDesc)); + + m_fenceValues[m_frameIndex]++; + + // Create an event handle to use for frame synchronization. + m_fenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); + if (m_fenceEvent == nullptr) + { + ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError())); + } + + // Wait for the command list to execute; we are reusing the same command + // list in our main loop but for now, we just want to wait for setup to + // complete before continuing. + WaitForGpu(); + } +} + +// Render the scene. +void DX12CudaInterop::OnRender() +{ + // Record all the commands we need to render the scene into the command list. + PopulateCommandList(); + + // Execute the command list. + ID3D12CommandList* ppCommandLists[] = { m_commandList.Get() }; + m_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); + + // Present the frame. + ThrowIfFailed(m_swapChain->Present(1, 0)); + + // Schedule a Signal command in the queue. + const UINT64 currentFenceValue = m_fenceValues[m_frameIndex]; + ThrowIfFailed(m_commandQueue->Signal(m_fence.Get(), currentFenceValue)); + + MoveToNextFrame(); +} + +void DX12CudaInterop::OnDestroy() +{ + // Ensure that the GPU is no longer referencing resources that are about to be + // cleaned up by the destructor. + WaitForGpu(); + checkCudaErrors(cudaDestroyExternalSemaphore(m_externalSemaphore)); + checkCudaErrors(cudaDestroyExternalMemory(m_externalMemory)); + CloseHandle(m_fenceEvent); +} + +void DX12CudaInterop::PopulateCommandList() +{ + // Command list allocators can only be reset when the associated + // command lists have finished execution on the GPU; apps should use + // fences to determine GPU execution progress. + ThrowIfFailed(m_commandAllocators[m_frameIndex]->Reset()); + + // However, when ExecuteCommandList() is called on a particular command + // list, that command list can then be reset at any time and must be before + // re-recording. + ThrowIfFailed(m_commandList->Reset(m_commandAllocators[m_frameIndex].Get(), m_pipelineState.Get())); + + m_commandList->SetGraphicsRootSignature(m_rootSignature.Get()); + + // Set necessary state. + m_commandList->RSSetViewports(1, &m_viewport); + m_commandList->RSSetScissorRects(1, &m_scissorRect); + + // Indicate that the back buffer will be used as a render target. + m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_renderTargets[m_frameIndex].Get(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET)); + + CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(m_rtvHeap->GetCPUDescriptorHandleForHeapStart(), m_frameIndex, m_rtvDescriptorSize); + m_commandList->OMSetRenderTargets(1, &rtvHandle, FALSE, nullptr); + + // Record commands. + const float clearColor[] = { 0.0f, 0.2f, 0.4f, 1.0f }; + m_commandList->ClearRenderTargetView(rtvHandle, clearColor, 0, nullptr); + m_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_POINTLIST); + m_commandList->IASetVertexBuffers(0, 1, &m_vertexBufferView); + m_commandList->DrawInstanced(vertBufHeight*vertBufWidth, 1, 0, 0); + + // Indicate that the back buffer will now be used to present. + m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_renderTargets[m_frameIndex].Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT)); + + ThrowIfFailed(m_commandList->Close()); +} + +// Wait for pending GPU work to complete. +void DX12CudaInterop::WaitForGpu() +{ + // Schedule a Signal command in the queue. + ThrowIfFailed(m_commandQueue->Signal(m_fence.Get(), m_fenceValues[m_frameIndex])); + + // Wait until the fence has been processed. + ThrowIfFailed(m_fence->SetEventOnCompletion(m_fenceValues[m_frameIndex], m_fenceEvent)); + WaitForSingleObjectEx(m_fenceEvent, INFINITE, FALSE); + + // Increment the fence value for the current frame. + m_fenceValues[m_frameIndex]++; +} + +// Prepare to render the next frame. +void DX12CudaInterop::MoveToNextFrame() +{ + const UINT64 currentFenceValue = m_fenceValues[m_frameIndex]; + cudaExternalSemaphoreWaitParams externalSemaphoreWaitParams; + memset(&externalSemaphoreWaitParams, 0, sizeof(externalSemaphoreWaitParams)); + + externalSemaphoreWaitParams.params.fence.value = currentFenceValue; + externalSemaphoreWaitParams.flags = 0; + + checkCudaErrors(cudaWaitExternalSemaphoresAsync(&m_externalSemaphore, &externalSemaphoreWaitParams, 1, m_streamToRun)); + + m_AnimTime += 0.01f; + RunSineWaveKernel(vertBufWidth, vertBufHeight, (Vertex *)m_cudaDevVertptr, m_streamToRun, m_AnimTime); + + cudaExternalSemaphoreSignalParams externalSemaphoreSignalParams; + memset(&externalSemaphoreSignalParams, 0, sizeof(externalSemaphoreSignalParams)); + m_fenceValues[m_frameIndex] = currentFenceValue + 1; + externalSemaphoreSignalParams.params.fence.value = m_fenceValues[m_frameIndex]; + externalSemaphoreSignalParams.flags = 0; + + checkCudaErrors(cudaSignalExternalSemaphoresAsync(&m_externalSemaphore, &externalSemaphoreSignalParams, 1, m_streamToRun)); + + // Update the frame index. + m_frameIndex = m_swapChain->GetCurrentBackBufferIndex(); + + // If the next frame is not ready to be rendered yet, wait until it is ready. + if (m_fence->GetCompletedValue() < m_fenceValues[m_frameIndex]) + { + ThrowIfFailed(m_fence->SetEventOnCompletion(m_fenceValues[m_frameIndex], m_fenceEvent)); + WaitForSingleObjectEx(m_fenceEvent, INFINITE, FALSE); + } + + // Set the fence value for the next frame. + m_fenceValues[m_frameIndex] = currentFenceValue + 2; +} diff --git a/Samples/simpleD3D12/simpleD3D12.h b/Samples/simpleD3D12/simpleD3D12.h new file mode 100755 index 00000000..0526d39e --- /dev/null +++ b/Samples/simpleD3D12/simpleD3D12.h @@ -0,0 +1,127 @@ +/* +* Copyright 1993-2018 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#pragma once + +#include "DX12CudaSample.h" +#include "ShaderStructs.h" + +using namespace DirectX; + +// Note that while ComPtr is used to manage the lifetime of resources on the CPU, +// it has no understanding of the lifetime of resources on the GPU. Apps must account +// for the GPU lifetime of resources to avoid destroying objects that may still be +// referenced by the GPU. +// An example of this can be found in the class method: OnDestroy(). +using Microsoft::WRL::ComPtr; + +static const char *shaderstr = +" struct PSInput \n" \ +" { \n" \ +" float4 position : SV_POSITION; \n" \ +" float4 color : COLOR; \n" \ +" } \n" \ +" PSInput VSMain(float3 position : POSITION, float4 color : COLOR) \n" \ +" { \n" \ +" PSInput result;\n" \ +" result.position = float4(position, 1.0f);\n" \ +" result.color = color;\n" \ +" return result; \n" \ +" } \n" \ +" float4 PSMain(PSInput input) : SV_TARGET \n" \ +" { \n" \ +" return input.color;\n" \ +" } \n"; + +class DX12CudaInterop : public DX12CudaSample +{ +public: + DX12CudaInterop(UINT width, UINT height, std::string name); + + virtual void OnInit(); + virtual void OnRender(); + virtual void OnDestroy(); + +private: + // In this sample we overload the meaning of FrameCount to mean both the maximum + // number of frames that will be queued to the GPU at a time, as well as the number + // of back buffers in the DXGI swap chain. For the majority of applications, this + // is convenient and works well. However, there will be certain cases where an + // application may want to queue up more frames than there are back buffers + // available. + // It should be noted that excessive buffering of frames dependent on user input + // may result in noticeable latency in your app. + static const UINT FrameCount = 2; + std::string shadersSrc = shaderstr; +#if 0 + " struct PSInput \n" \ + " { \n" \ + " float4 position : SV_POSITION; \n" \ + " float4 color : COLOR; \n" \ + " } \n" \ + " PSInput VSMain(float3 position : POSITION, float4 color : COLOR) \n" \ + " { \n" \ + " PSInput result;\n" \ + " result.position = float4(position, 1.0f);\n" \ + " result.color = color;\n" \ + " return result; \n" \ + " } \n" \ + " float4 PSMain(PSInput input) : SV_TARGET \n" \ + " { \n" \ + " return input.color;\n" \ + " } \n"; +#endif + + // Vertex Buffer dimension + size_t vertBufHeight, vertBufWidth; + + // Pipeline objects. + D3D12_VIEWPORT m_viewport; + CD3DX12_RECT m_scissorRect; + ComPtr m_swapChain; + ComPtr m_device; + ComPtr m_renderTargets[FrameCount]; + ComPtr m_commandAllocators[FrameCount]; + ComPtr m_commandQueue; + ComPtr m_rootSignature; + ComPtr m_rtvHeap; + ComPtr m_pipelineState; + ComPtr m_commandList; + UINT m_rtvDescriptorSize; + + // App resources. + ComPtr m_vertexBuffer; + D3D12_VERTEX_BUFFER_VIEW m_vertexBufferView; + + // Synchronization objects. + UINT m_frameIndex; + HANDLE m_fenceEvent; + ComPtr m_fence; + UINT64 m_fenceValues[FrameCount]; + + // CUDA objects + cudaExternalMemoryHandleType m_externalMemoryHandleType; + cudaExternalMemory_t m_externalMemory; + cudaExternalSemaphore_t m_externalSemaphore; + cudaStream_t m_streamToRun; + LUID m_dx12deviceluid; + UINT m_cudaDeviceID; + UINT m_nodeMask; + float m_AnimTime; + void *m_cudaDevVertptr = NULL; + + void LoadPipeline(); + void InitCuda(); + void LoadAssets(); + void PopulateCommandList(); + void MoveToNextFrame(); + void WaitForGpu(); +}; diff --git a/Samples/simpleD3D12/simpleD3D12_vs2015.sln b/Samples/simpleD3D12/simpleD3D12_vs2015.sln new file mode 100644 index 00000000..0d017217 --- /dev/null +++ b/Samples/simpleD3D12/simpleD3D12_vs2015.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 14.00 +# Visual Studio 2015 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "simpleD3D12", "simpleD3D12_vs2015.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64 + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64 + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64 + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/Samples/simpleD3D12/simpleD3D12_vs2015.vcxproj b/Samples/simpleD3D12/simpleD3D12_vs2015.vcxproj new file mode 100644 index 00000000..105fa7a9 --- /dev/null +++ b/Samples/simpleD3D12/simpleD3D12_vs2015.vcxproj @@ -0,0 +1,134 @@ + + + + $(VCTargetsPath)\BuildCustomizations + + + + Debug + x64 + + + Release + x64 + + + + {997E0757-EA74-4A4E-A0FC-47D8C8831A15} + simpleD3D12_vs2015 + simpleD3D12 + + + + $([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0')) + $(LatestTargetPlatformVersion) + $(WindowsTargetPlatformVersion) + + + + Application + MultiByte + v140 + + + true + + + true + + + + + + + + + + + $(Platform)/$(Configuration)/ + $(IncludePath) + AllRules.ruleset + + + + + ../../bin/win64/$(Configuration)/ + + + + Level3 + WIN32;_MBCS;%(PreprocessorDefinitions) + ./;$(CudaToolkitDir)/include;../../Common;$(DXSDK_DIR)/include; + + + Windows + cudart_static.lib;d3d12.lib;d3dcompiler.lib;dxgi.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + $(CudaToolkitLibDir); + $(OutDir)/simpleD3D12.exe + + + compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75; + -Xcompiler "/wd 4819" %(AdditionalOptions) + ./;../../Common + WIN32 + + + + + Disabled + MultiThreadedDebug + + + true + Default + + + MTd + 64 + + + + + MaxSpeed + MultiThreaded + + + false + UseLinkTimeCodeGeneration + + + MT + 64 + + + + + $(DXSDK_DIR)/Lib/x86;%(AdditionalLibraryDirectories) + + + + + $(DXSDK_DIR)/Lib/$(PlatformName);%(AdditionalLibraryDirectories) + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Samples/simpleD3D12/simpleD3D12_vs2017.sln b/Samples/simpleD3D12/simpleD3D12_vs2017.sln new file mode 100644 index 00000000..d4214caf --- /dev/null +++ b/Samples/simpleD3D12/simpleD3D12_vs2017.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 2017 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "simpleD3D12", "simpleD3D12_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64 + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64 + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64 + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/Samples/simpleD3D12/simpleD3D12_vs2017.vcxproj b/Samples/simpleD3D12/simpleD3D12_vs2017.vcxproj new file mode 100644 index 00000000..d65aeb29 --- /dev/null +++ b/Samples/simpleD3D12/simpleD3D12_vs2017.vcxproj @@ -0,0 +1,135 @@ + + + + $(VCTargetsPath)\BuildCustomizations + + + + Debug + x64 + + + Release + x64 + + + + {997E0757-EA74-4A4E-A0FC-47D8C8831A15} + simpleD3D12_vs2017 + simpleD3D12 + + + + $([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0')) + $(LatestTargetPlatformVersion) + $(WindowsTargetPlatformVersion) + + + + Application + MultiByte + v141 + 10.0.15063.0 + + + true + + + true + + + + + + + + + + + $(Platform)/$(Configuration)/ + $(IncludePath) + AllRules.ruleset + + + + + ../../bin/win64/$(Configuration)/ + + + + Level3 + WIN32;_MBCS;%(PreprocessorDefinitions) + ./;$(CudaToolkitDir)/include;../../Common;$(DXSDK_DIR)/include; + + + Windows + cudart_static.lib;d3d12.lib;d3dcompiler.lib;dxgi.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + $(CudaToolkitLibDir); + $(OutDir)/simpleD3D12.exe + + + compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75; + -Xcompiler "/wd 4819" %(AdditionalOptions) + ./;../../Common + WIN32 + + + + + Disabled + MultiThreadedDebug + + + true + Default + + + MTd + 64 + + + + + MaxSpeed + MultiThreaded + + + false + UseLinkTimeCodeGeneration + + + MT + 64 + + + + + $(DXSDK_DIR)/Lib/x86;%(AdditionalLibraryDirectories) + + + + + $(DXSDK_DIR)/Lib/$(PlatformName);%(AdditionalLibraryDirectories) + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Samples/simpleD3D12/sinewave_cuda.cu b/Samples/simpleD3D12/sinewave_cuda.cu new file mode 100755 index 00000000..7c422ea3 --- /dev/null +++ b/Samples/simpleD3D12/sinewave_cuda.cu @@ -0,0 +1,55 @@ +/* + * Copyright 1993-2018 NVIDIA Corporation. All rights reserved. + * + * Please refer to the NVIDIA end user license agreement (EULA) associated + * with this source code for terms and conditions that govern your use of + * this software. Any use, reproduction, disclosure, or distribution of + * this software and related documentation outside the terms of the EULA + * is strictly prohibited. + * + */ + +#include +#include "ShaderStructs.h" +#include "helper_cuda.h" + +__global__ void sinewave_gen_kernel(Vertex *vertices, unsigned int width, unsigned int height, float time) +{ + unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + // calculate uv coordinates + float u = x / (float) width; + float v = y / (float) height; + u = u*2.0f - 1.0f; + v = v*2.0f - 1.0f; + + // calculate simple sine wave pattern + float freq = 4.0f; + float w = sinf(u*freq + time) * cosf(v*freq + time) * 0.5f; + + if (y < height && x < width) + { + // write output vertex + vertices[y*width+x].position.x = u; + vertices[y*width+x].position.y = w; + vertices[y*width+x].position.z = v; + //vertices[y*width+x].position[3] = 1.0f; + vertices[y*width+x].color.x = 1.0f; + vertices[y*width+x].color.y = 0.0f; + vertices[y*width+x].color.z = 0.0f; + vertices[y*width + x].color.w = 0.0f; + } +} + +// The host CPU Sinewave thread spawner +void RunSineWaveKernel(size_t mesh_width, size_t mesh_height, Vertex *cudaDevVertptr, cudaStream_t streamToRun, float AnimTime) +{ + dim3 block(16, 16, 1); + dim3 grid(mesh_width / 16, mesh_height / 16, 1); + Vertex *vertices = (Vertex*)cudaDevVertptr; + sinewave_gen_kernel<<< grid, block, 0, streamToRun >>>(vertices, mesh_width, mesh_height, AnimTime); + + getLastCudaError("sinewave_gen_kernel execution failed.\n"); +} + diff --git a/Samples/simpleD3D12/stdafx.cpp b/Samples/simpleD3D12/stdafx.cpp new file mode 100755 index 00000000..177ed994 --- /dev/null +++ b/Samples/simpleD3D12/stdafx.cpp @@ -0,0 +1,12 @@ +/* +* Copyright 1993-2018 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#include "stdafx.h" diff --git a/Samples/simpleD3D12/stdafx.h b/Samples/simpleD3D12/stdafx.h new file mode 100755 index 00000000..bb0d799a --- /dev/null +++ b/Samples/simpleD3D12/stdafx.h @@ -0,0 +1,33 @@ +/* +* Copyright 1993-2018 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + + +// stdafx.h : include file for standard system include files, +// or project specific include files that are used frequently, but +// are changed infrequently. + +#pragma once + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers. +#endif + +#include + +#include +#include +#include +#include +#include "d3dx12.h" + +#include +#include +#include