-- Add simpleD3D12 sample which demonstrates DX12-CUDA interop

rendering sinewave -- Update the documentation with description about this sample
2025-12-16 02:27:49 +08:00 · 2018-10-04 20:05:23 +05:30 · 2018-10-04 20:05:23 +05:30 · fcb23487a8
commit fcb23487a8
parent 91dc60dd96
22 changed files with 3284 additions and 4 deletions
--- a/3
+++ b/3
@ -23,3 +23,6 @@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+For additional information on the license terms, see the CUDA EULA at
+https://docs.nvidia.com/cuda/eula/index.html
--- a/README.md
+++ b/README.md
@ -10,6 +10,7 @@ This section describes the release notes for the CUDA Samples on GitHub only.
 *  Added `simpleCudaGraphs`. Demonstrates CUDA Graphs creation, instantiation and launch using Graphs APIs and Stream Capture APIs.
 *  Added `conjugateGradientCudaGraphs`. Demonstrates conjugate gradient solver on GPU using CUBLAS and CUSPARSE library calls captured and called using CUDA Graph APIs.
 *  Added `simpleVulkan`. Demonstrates Vulkan - CUDA Interop.
+*  Added `simpleD3D12`. Demonstrates DX12 - CUDA Interop.
 *  Added `UnifiedMemoryPerf`. Demonstrates performance comparision of various memory types involved in system.
 *  Added `p2pBandwidthLatencyTest`. Demonstrates Peer-To-Peer (P2P) data transfers between pairs of GPUs and computes latency and bandwidth.
 *  Added `systemWideAtomics`. Demonstrates system wide atomic instructions.
@ -129,10 +130,10 @@ The samples makefiles can take advantage of certain options:
 #### Windows
 **[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[shfl_scan](./Samples/shfl_scan)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** |
 ---|---|---|---|
-**[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** |
-**[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[simpleCUFFT](./Samples/simpleCUFFT)** |
-**[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[simpleVulkan](./Samples/simpleVulkan)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** |
-**[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[matrixMul](./Samples/matrixMul)** |
+**[simpleD3D12](./Samples/simpleD3D12)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** |
+**[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[matrixMulDrv](./Samples/matrixMulDrv)** |
+**[simpleCUFFT](./Samples/simpleCUFFT)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[simpleVulkan](./Samples/simpleVulkan)** |
+**[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[matrixMul](./Samples/matrixMul)** |

 #### Mac OSX
 **[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[shfl_scan](./Samples/shfl_scan)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** |
@ -169,6 +170,10 @@ Some samples can only be run on a 64-bit operating system.

 DirectX is a collection of APIs designed to allow development of multimedia applications on Microsoft platforms. For Microsoft platforms, NVIDIA's CUDA Driver supports DirectX. Several CUDA Samples for Windows demonstrates CUDA-DirectX Interoperability, for building such samples one needs to install [Direct X SDK (June 2010 or newer)](http://www.microsoft.com/en-us/download/details.aspx?id=6812) , this is required to be installed on Windows 7, Windows 10 and Windows Server 2008, Other Windows OSes do not need to explicitly install the DirectX SDK.

+#### DirectX12
+
+DirectX 12 is a collection of advanced low-level programming APIs which can reduce driver overhead, designed to allow development of multimedia applications on Microsoft platforms starting with Windows 10 OS onwards. For Microsoft platforms, NVIDIA's CUDA Driver supports DirectX. Few CUDA Samples for Windows demonstrates CUDA-DirectX12 Interoperability, for building such samples one needs to install [Windows 10 SDK or higher](https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk), with VS 2015 or VS 2017.
+
 #### OpenGL

 OpenGL is a graphics library used for 2D and 3D rendering. On systems which support OpenGL, NVIDIA's OpenGL implementation is provided with the CUDA Driver.
--- a/Samples/simpleD3D12/DX12CudaSample.cpp
+++ b/Samples/simpleD3D12/DX12CudaSample.cpp
@ -0,0 +1,118 @@
+/*
+* Copyright 1993-2018 NVIDIA Corporation.  All rights reserved.
+*
+* Please refer to the NVIDIA end user license agreement (EULA) associated
+* with this source code for terms and conditions that govern your use of
+* this software. Any use, reproduction, disclosure, or distribution of
+* this software and related documentation outside the terms of the EULA
+* is strictly prohibited.
+*
+*/
+
+#include "stdafx.h"
+#include "DX12CudaSample.h"
+#include <helper_string.h>
+
+using namespace Microsoft::WRL;
+
+DX12CudaSample::DX12CudaSample(UINT width, UINT height, std::string name) :
+	m_width(width),
+	m_height(height),
+	m_title(name),
+	m_useWarpDevice(false)
+{
+	m_aspectRatio = static_cast<float>(width) / static_cast<float>(height);
+}
+
+DX12CudaSample::~DX12CudaSample()
+{
+}
+
+std::wstring DX12CudaSample::string2wstring(const std::string& s)
+{
+	int len;
+	int slength = (int)s.length() + 1;
+	len = MultiByteToWideChar(CP_ACP, 0, s.c_str(), slength, 0, 0);
+	wchar_t* buf = new wchar_t[len];
+	MultiByteToWideChar(CP_ACP, 0, s.c_str(), slength, buf, len);
+	std::wstring r(buf);
+	delete[] buf;
+	return r;
+}
+// Helper function for resolving the full path of assets.
+std::wstring DX12CudaSample::GetAssetFullPath(const char* assetName)
+{
+	LPTSTR lpBuffer = new char[4096];
+	GetCurrentDirectory(FILENAME_MAX, lpBuffer);
+	char *tmp = sdkFindFilePath((const char *)assetName, "simpleD3D12");
+	if (tmp == NULL)
+	{ 
+		throw std::exception("File not found");
+	}
+	for (int i = 0; i < strlen(tmp); i++)
+	{
+		if (tmp[i] == '/')
+		{
+			tmp[i] = '\\';
+		}
+	}
+	m_assetsPath = lpBuffer;
+	m_assetsPath = m_assetsPath + "\\" + tmp;
+
+	std::wstring stemp = string2wstring(m_assetsPath);
+
+	return stemp;
+}
+
+// Helper function for acquiring the first available hardware adapter that supports Direct3D 12.
+// If no such adapter can be found, *ppAdapter will be set to nullptr.
+_Use_decl_annotations_
+void DX12CudaSample::GetHardwareAdapter(IDXGIFactory2* pFactory, IDXGIAdapter1** ppAdapter)
+{
+	ComPtr<IDXGIAdapter1> adapter;
+	*ppAdapter = nullptr;
+
+	for (UINT adapterIndex = 0; DXGI_ERROR_NOT_FOUND != pFactory->EnumAdapters1(adapterIndex, &adapter); ++adapterIndex)
+	{
+		DXGI_ADAPTER_DESC1 desc;
+		adapter->GetDesc1(&desc);
+
+		if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE)
+		{
+			// Don't select the Basic Render Driver adapter.
+			// If you want a software adapter, pass in "/warp" on the command line.
+			continue;
+		}
+
+		// Check to see if the adapter supports Direct3D 12, but don't create the
+		// actual device yet.
+		if (SUCCEEDED(D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, _uuidof(ID3D12Device), nullptr)))
+		{
+			break;
+		}
+	}
+
+	*ppAdapter = adapter.Detach();
+}
+
+// Helper function for setting the window's title text.
+void DX12CudaSample::SetCustomWindowText(const char* text)
+{
+	std::string windowText = m_title + text;
+	SetWindowText(Win32Application::GetHwnd(), windowText.c_str());
+}
+
+// Helper function for parsing any supplied command line args.
+_Use_decl_annotations_
+void DX12CudaSample::ParseCommandLineArgs(WCHAR* argv[], int argc)
+{
+	for (int i = 1; i < argc; ++i)
+	{
+		if (_wcsnicmp(argv[i], L"-warp", wcslen(argv[i])) == 0 || 
+			_wcsnicmp(argv[i], L"/warp", wcslen(argv[i])) == 0)
+		{
+			m_useWarpDevice = true;
+			m_title = m_title + " (WARP)";
+		}
+	}
+}
--- a/Samples/simpleD3D12/DX12CudaSample.h
+++ b/Samples/simpleD3D12/DX12CudaSample.h
@ -0,0 +1,58 @@
+/*
+* Copyright 1993-2018 NVIDIA Corporation.  All rights reserved.
+*
+* Please refer to the NVIDIA end user license agreement (EULA) associated
+* with this source code for terms and conditions that govern your use of
+* this software. Any use, reproduction, disclosure, or distribution of
+* this software and related documentation outside the terms of the EULA
+* is strictly prohibited.
+*
+*/
+
+#pragma once
+
+#include "DXSampleHelper.h"
+#include "Win32Application.h"
+
+class DX12CudaSample
+{
+public:
+	DX12CudaSample(UINT width, UINT height, std::string name);
+	virtual ~DX12CudaSample();
+
+	virtual void OnInit() = 0;
+	virtual void OnRender() = 0;
+	virtual void OnDestroy() = 0;
+
+	// Samples override the event handlers to handle specific messages.
+	virtual void OnKeyDown(UINT8 /*key*/)   {}
+	virtual void OnKeyUp(UINT8 /*key*/)     {}
+
+	// Accessors.
+	UINT GetWidth() const           { return m_width; }
+	UINT GetHeight() const          { return m_height; }
+	const CHAR* GetTitle() const   { return m_title.c_str(); }
+
+	void ParseCommandLineArgs(_In_reads_(argc) WCHAR* argv[], int argc);
+
+protected:
+	std::wstring GetAssetFullPath(const char* assetName);
+	void GetHardwareAdapter(_In_ IDXGIFactory2* pFactory, _Outptr_result_maybenull_ IDXGIAdapter1** ppAdapter);
+	void SetCustomWindowText(const char* text);
+	std::wstring string2wstring(const std::string& s);
+
+	// Viewport dimensions.
+	UINT m_width;
+	UINT m_height;
+	float m_aspectRatio;
+
+	// Adapter info.
+	bool m_useWarpDevice;
+
+private:
+	// Root assets path.
+	std::string m_assetsPath;
+
+	// Window title.
+	std::string m_title;
+};
--- a/Samples/simpleD3D12/DXSampleHelper.h
+++ b/Samples/simpleD3D12/DXSampleHelper.h
@ -0,0 +1,171 @@
+/*
+* Copyright 1993-2018 NVIDIA Corporation.  All rights reserved.
+*
+* Please refer to the NVIDIA end user license agreement (EULA) associated
+* with this source code for terms and conditions that govern your use of
+* this software. Any use, reproduction, disclosure, or distribution of
+* this software and related documentation outside the terms of the EULA
+* is strictly prohibited.
+*
+*/
+
+#pragma once
+
+
+// Note that while ComPtr is used to manage the lifetime of resources on the CPU,
+// it has no understanding of the lifetime of resources on the GPU. Apps must account
+// for the GPU lifetime of resources to avoid destroying objects that may still be
+// referenced by the GPU.
+using Microsoft::WRL::ComPtr;
+
+
+inline std::string HrToString(HRESULT hr)
+{
+    char s_str[64] = {};
+    sprintf_s(s_str, "HRESULT of 0x%08X", static_cast<UINT>(hr));
+    return std::string(s_str);
+}
+
+class HrException : public std::runtime_error
+{
+public:
+    HrException(HRESULT hr) : std::runtime_error(HrToString(hr)), m_hr(hr) {}
+    HRESULT Error() const { return m_hr; }
+private:
+    const HRESULT m_hr;
+};
+
+#define SAFE_RELEASE(p) if (p) (p)->Release()
+
+inline void ThrowIfFailed(HRESULT hr)
+{
+    if (FAILED(hr))
+    {
+        throw HrException(hr);
+    }
+}
+
+inline HRESULT ReadDataFromFile(LPCWSTR filename, byte** data, UINT* size)
+{
+    using namespace Microsoft::WRL;
+
+    CREATEFILE2_EXTENDED_PARAMETERS extendedParams = {};
+    extendedParams.dwSize = sizeof(CREATEFILE2_EXTENDED_PARAMETERS);
+    extendedParams.dwFileAttributes = FILE_ATTRIBUTE_NORMAL;
+    extendedParams.dwFileFlags = FILE_FLAG_SEQUENTIAL_SCAN;
+    extendedParams.dwSecurityQosFlags = SECURITY_ANONYMOUS;
+    extendedParams.lpSecurityAttributes = nullptr;
+    extendedParams.hTemplateFile = nullptr;
+
+    Wrappers::FileHandle file(CreateFile2(filename, GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING, &extendedParams));
+    if (file.Get() == INVALID_HANDLE_VALUE)
+    {
+        throw std::exception();
+    }
+
+    FILE_STANDARD_INFO fileInfo = {};
+    if (!GetFileInformationByHandleEx(file.Get(), FileStandardInfo, &fileInfo, sizeof(fileInfo)))
+    {
+        throw std::exception();
+    }
+
+    if (fileInfo.EndOfFile.HighPart != 0)
+    {
+        throw std::exception();
+    }
+
+    *data = reinterpret_cast<byte*>(malloc(fileInfo.EndOfFile.LowPart));
+    *size = fileInfo.EndOfFile.LowPart;
+
+    if (!ReadFile(file.Get(), *data, fileInfo.EndOfFile.LowPart, nullptr, nullptr))
+    {
+        throw std::exception();
+    }
+
+    return S_OK;
+}
+
+// Assign a name to the object to aid with debugging.
+#if defined(_DEBUG) || defined(DBG)
+inline void SetName(ID3D12Object* pObject, LPCWSTR name)
+{
+    pObject->SetName(name);
+}
+inline void SetNameIndexed(ID3D12Object* pObject, LPCWSTR name, UINT index)
+{
+    WCHAR fullName[50];
+    if (swprintf_s(fullName, L"%s[%u]", name, index) > 0)
+    {
+        pObject->SetName(fullName);
+    }
+}
+#else
+inline void SetName(ID3D12Object*, LPCWSTR)
+{
+}
+inline void SetNameIndexed(ID3D12Object*, LPCWSTR, UINT)
+{
+}
+#endif
+
+// Naming helper for ComPtr<T>.
+// Assigns the name of the variable as the name of the object.
+// The indexed variant will include the index in the name of the object.
+#define NAME_D3D12_OBJECT(x) SetName((x).Get(), L#x)
+#define NAME_D3D12_OBJECT_INDEXED(x, n) SetNameIndexed((x)[n].Get(), L#x, n)
+
+inline UINT CalculateConstantBufferByteSize(UINT byteSize)
+{
+    // Constant buffer size is required to be aligned.
+    return (byteSize + (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1)) & ~(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1);
+}
+
+#ifdef D3D_COMPILE_STANDARD_FILE_INCLUDE
+inline Microsoft::WRL::ComPtr<ID3DBlob> CompileShader(
+    const std::wstring& filename,
+    const D3D_SHADER_MACRO* defines,
+    const std::string& entrypoint,
+    const std::string& target)
+{
+    UINT compileFlags = 0;
+#if defined(_DEBUG) || defined(DBG)
+    compileFlags = D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION;
+#endif
+
+    HRESULT hr;
+
+    Microsoft::WRL::ComPtr<ID3DBlob> byteCode = nullptr;
+    Microsoft::WRL::ComPtr<ID3DBlob> errors;
+    hr = D3DCompileFromFile(filename.c_str(), defines, D3D_COMPILE_STANDARD_FILE_INCLUDE,
+        entrypoint.c_str(), target.c_str(), compileFlags, 0, &byteCode, &errors);
+
+    if (errors != nullptr)
+    {
+        OutputDebugStringA((char*)errors->GetBufferPointer());
+    }
+    ThrowIfFailed(hr);
+
+    return byteCode;
+}
+#endif
+
+// Resets all elements in a ComPtr array.
+template<class T>
+void ResetComPtrArray(T* comPtrArray)
+{
+    for (auto &i : *comPtrArray)
+    {
+        i.Reset();
+    }
+}
+
+
+// Resets all elements in a unique_ptr array.
+template<class T>
+void ResetUniquePtrArray(T* uniquePtrArray)
+{
+    for (auto &i : *uniquePtrArray)
+    {
+        i.reset();
+    }
+}
--- a/Samples/simpleD3D12/Main.cpp
+++ b/Samples/simpleD3D12/Main.cpp
@ -0,0 +1,20 @@
+/*
+* Copyright 1993-2018 NVIDIA Corporation.  All rights reserved.
+*
+* Please refer to the NVIDIA end user license agreement (EULA) associated
+* with this source code for terms and conditions that govern your use of
+* this software. Any use, reproduction, disclosure, or distribution of
+* this software and related documentation outside the terms of the EULA
+* is strictly prohibited.
+*
+*/
+
+#include "stdafx.h"
+#include "simpleD3D12.h"
+
+_Use_decl_annotations_
+int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE, LPSTR, int nCmdShow)
+{
+	DX12CudaInterop sample(1280, 720, "D3D12 CUDA Interop");
+	return Win32Application::Run(&sample, hInstance, nCmdShow);
+}
--- a/Samples/simpleD3D12/NsightEclipse.xml
+++ b/Samples/simpleD3D12/NsightEclipse.xml
@ -0,0 +1,62 @@
+<?xml version="1.0" encoding="UTF-8"?> 
+<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
+<entry>
+  <name>simpleD3D12</name>
+  <cuda_api_list>
+    <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaImportExternalSemaphore</toolkit>
+    <toolkit>cudaExternalMemoryGetMappedBuffer</toolkit>
+    <toolkit>cudaImportExternalMemory</toolkit>
+    <toolkit>cudaDestroyExternalSemaphore</toolkit>
+    <toolkit>cudaDestroyExternalMemory</toolkit>
+  </cuda_api_list>
+  <description><![CDATA[A program which demonstrates Direct3D12 interoperability with CUDA.  The program creates a sinewave in DX12 vertex buffer which is created using CUDA kernels. DX12 and CUDA synchronizes using DirectX12 Fences. Direct3D then renders the results on the screen.  A DirectX12 Capable NVIDIA GPU is required on Windows10 or higher OS.]]></description>
+  <devicecompilation>whole</devicecompilation>
+  <includepaths>
+    <path>./</path>
+    <path>../</path>
+    <path>../../common/inc</path>
+  </includepaths>
+  <keyconcepts>
+    <concept level="basic">Graphics Interop</concept>
+    <concept level="advanced">CUDA DX12 Interop</concept>
+    <concept level="basic">Image Processing</concept>
+  </keyconcepts>
+  <keywords>
+    <keyword>D3D</keyword>
+    <keyword>d3d12</keyword>
+  </keywords>
+  <libraries>
+  </libraries>
+  <librarypaths>
+  </librarypaths>
+  <nsight_eclipse>true</nsight_eclipse>
+  <primary_file>simpleD3D12.cpp</primary_file>
+  <required_dependencies>
+    <dependency>DirectX12</dependency>
+  </required_dependencies>
+  <scopes>
+    <scope>1:CUDA Basic Topics</scope>
+    <scope>2:Graphics Interop</scope>
+  </scopes>
+  <sm-arch>sm30</sm-arch>
+  <sm-arch>sm35</sm-arch>
+  <sm-arch>sm37</sm-arch>
+  <sm-arch>sm50</sm-arch>
+  <sm-arch>sm52</sm-arch>
+  <sm-arch>sm60</sm-arch>
+  <sm-arch>sm61</sm-arch>
+  <sm-arch>sm70</sm-arch>
+  <sm-arch>sm75</sm-arch>
+  <supported_envs>
+    <env>
+      <platform>windows10</platform>
+    </env>
+  </supported_envs>
+  <supported_sm_architectures>
+    <include>all</include>
+  </supported_sm_architectures>
+  <title>Simple D3D12 CUDA Interop</title>
+  <type>exe</type>
+</entry>
--- a/Samples/simpleD3D12/README.md
+++ b/Samples/simpleD3D12/README.md
@ -0,0 +1,49 @@
+# simpleD3D12 - Simple D3D12 CUDA Interop
+
+## Description
+
+A program which demonstrates Direct3D12 interoperability with CUDA.  The program creates a sinewave in DX12 vertex buffer which is created using CUDA kernels. DX12 and CUDA synchronizes using DirectX12 Fences. Direct3D then renders the results on the screen.  A DirectX12 Capable NVIDIA GPU is required on Windows10 or higher OS.
+
+## Key Concepts
+
+Graphics Interop, CUDA DX12 Interop, Image Processing
+
+## Supported SM Architectures
+
+[SM 3.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)
+
+## Supported OSes
+
+Windows
+
+## Supported CPU Architecture
+
+x86_64
+
+## CUDA APIs involved
+
+### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
+cudaWaitExternalSemaphoresAsync, cudaSignalExternalSemaphoresAsync, cudaImportExternalSemaphore, cudaExternalMemoryGetMappedBuffer, cudaImportExternalMemory, cudaDestroyExternalSemaphore, cudaDestroyExternalMemory
+
+## Dependencies needed to build/run
+[DirectX12](../../README.md#directx12)
+
+## Prerequisites
+
+Download and install the [CUDA Toolkit 10.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Make sure the dependencies mentioned in [Dependencies]() section above are installed.
+
+## Build and Run
+
+### Windows
+The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
+```
+*_vs<version>.sln - for Visual Studio <version>
+```
+Each individual sample has its own set of solution files in its directory:
+
+To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
+> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
+
+## References (for more details)
+
--- a/Samples/simpleD3D12/ShaderStructs.h
+++ b/Samples/simpleD3D12/ShaderStructs.h
@ -0,0 +1,36 @@
+/*
+* Copyright 1993-2018 NVIDIA Corporation.  All rights reserved.
+*
+* Please refer to the NVIDIA end user license agreement (EULA) associated
+* with this source code for terms and conditions that govern your use of
+* this software. Any use, reproduction, disclosure, or distribution of
+* this software and related documentation outside the terms of the EULA
+* is strictly prohibited.
+*
+*/
+
+#pragma once
+
+#include "stdafx.h"
+#include <cuda_runtime.h>
+#include "helper_cuda.h"
+
+using namespace DirectX;
+
+struct Vertex
+{
+	XMFLOAT3 position;
+	XMFLOAT4 color;
+};
+
+#if 0
+// Constant buffer used to send MVP matrices to the vertex shader.
+struct ModelViewProjectionConstantBuffer
+{
+	XMFLOAT4X4 model;
+	XMFLOAT4X4 view;
+	XMFLOAT4X4 projection;
+};
+#endif
+
+void RunSineWaveKernel(size_t mesh_width, size_t mesh_height, Vertex *cudaDevVertptr, cudaStream_t streamToRun, float AnimTime);
--- a/Samples/simpleD3D12/Win32Application.cpp
+++ b/Samples/simpleD3D12/Win32Application.cpp
@ -0,0 +1,118 @@
+/*
+* Copyright 1993-2018 NVIDIA Corporation.  All rights reserved.
+*
+* Please refer to the NVIDIA end user license agreement (EULA) associated
+* with this source code for terms and conditions that govern your use of
+* this software. Any use, reproduction, disclosure, or distribution of
+* this software and related documentation outside the terms of the EULA
+* is strictly prohibited.
+*
+*/
+
+#include "stdafx.h"
+#include "Win32Application.h"
+
+HWND Win32Application::m_hwnd = nullptr;
+
+int Win32Application::Run(DX12CudaSample* pSample, HINSTANCE hInstance, int nCmdShow)
+{
+	// Parse the command line parameters
+	int argc;
+	LPWSTR* argv = CommandLineToArgvW(GetCommandLineW(), &argc);
+	pSample->ParseCommandLineArgs(argv, argc);
+	LocalFree(argv);
+
+	// Initialize the window class.
+	WNDCLASSEX windowClass = { 0 };
+	windowClass.cbSize = sizeof(WNDCLASSEX);
+	windowClass.style = CS_HREDRAW | CS_VREDRAW;
+	windowClass.lpfnWndProc = WindowProc;
+	windowClass.hInstance = hInstance;
+	windowClass.hCursor = LoadCursor(NULL, IDC_ARROW);
+	windowClass.lpszClassName = "DX12CudaSampleClass";
+	RegisterClassEx(&windowClass);
+
+	RECT windowRect = { 0, 0, static_cast<LONG>(pSample->GetWidth()), static_cast<LONG>(pSample->GetHeight()) };
+	AdjustWindowRect(&windowRect, WS_OVERLAPPEDWINDOW, FALSE);
+
+	// Create the window and store a handle to it.
+	m_hwnd = CreateWindow(
+		windowClass.lpszClassName,
+		pSample->GetTitle(),
+		WS_OVERLAPPEDWINDOW,
+		CW_USEDEFAULT,
+		CW_USEDEFAULT,
+		windowRect.right - windowRect.left,
+		windowRect.bottom - windowRect.top,
+		nullptr,		// We have no parent window.
+		nullptr,		// We aren't using menus.
+		hInstance,
+		pSample);
+
+	// Initialize the sample. OnInit is defined in each child-implementation of DXSample.
+	pSample->OnInit();
+
+	ShowWindow(m_hwnd, nCmdShow);
+
+	// Main sample loop.
+	MSG msg = {};
+	while (msg.message != WM_QUIT)
+	{
+		// Process any messages in the queue.
+		if (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE))
+		{
+			TranslateMessage(&msg);
+			DispatchMessage(&msg);
+		}
+	}
+
+	pSample->OnDestroy();
+
+	// Return this part of the WM_QUIT message to Windows.
+	return static_cast<char>(msg.wParam);
+}
+
+// Main message handler for the sample.
+LRESULT CALLBACK Win32Application::WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
+{
+	DX12CudaSample* pSample = reinterpret_cast<DX12CudaSample*>(GetWindowLongPtr(hWnd, GWLP_USERDATA));
+
+	switch (message)
+	{
+	case WM_CREATE:
+		{
+			// Save the DXSample* passed in to CreateWindow.
+			LPCREATESTRUCT pCreateStruct = reinterpret_cast<LPCREATESTRUCT>(lParam);
+			SetWindowLongPtr(hWnd, GWLP_USERDATA, reinterpret_cast<LONG_PTR>(pCreateStruct->lpCreateParams));
+		}
+		return 0;
+
+	case WM_KEYDOWN:
+		if (pSample)
+		{
+			pSample->OnKeyDown(static_cast<UINT8>(wParam));
+		}
+		return 0;
+
+	case WM_KEYUP:
+		if (pSample)
+		{
+			pSample->OnKeyUp(static_cast<UINT8>(wParam));
+		}
+		return 0;
+
+	case WM_PAINT:
+		if (pSample)
+		{
+			pSample->OnRender();
+		}
+		return 0;
+
+	case WM_DESTROY:
+		PostQuitMessage(0);
+		return 0;
+	}
+
+	// Handle any messages the switch statement didn't.
+	return DefWindowProc(hWnd, message, wParam, lParam);
+}
--- a/Samples/simpleD3D12/Win32Application.h
+++ b/Samples/simpleD3D12/Win32Application.h
@ -0,0 +1,29 @@
+/*
+* Copyright 1993-2018 NVIDIA Corporation.  All rights reserved.
+*
+* Please refer to the NVIDIA end user license agreement (EULA) associated
+* with this source code for terms and conditions that govern your use of
+* this software. Any use, reproduction, disclosure, or distribution of
+* this software and related documentation outside the terms of the EULA
+* is strictly prohibited.
+*
+*/
+
+#pragma once
+
+#include "DX12CudaSample.h"
+
+class DX12CudaSample;
+
+class Win32Application
+{
+public:
+	static int Run(DX12CudaSample* pSample, HINSTANCE hInstance, int nCmdShow);
+	static HWND GetHwnd() { return m_hwnd; }
+
+protected:
+	static LRESULT CALLBACK WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam);
+
+private:
+	static HWND m_hwnd;
+};
--- a/Samples/simpleD3D12/d3dx12.h
+++ b/Samples/simpleD3D12/d3dx12.h
--- a/Samples/simpleD3D12/shaders.hlsl
+++ b/Samples/simpleD3D12/shaders.hlsl
@ -0,0 +1,34 @@
+/*
+* Copyright 1993-2018 NVIDIA Corporation.  All rights reserved.
+*
+* Please refer to the NVIDIA end user license agreement (EULA) associated
+* with this source code for terms and conditions that govern your use of
+* this software. Any use, reproduction, disclosure, or distribution of
+* this software and related documentation outside the terms of the EULA
+* is strictly prohibited.
+*
+*/
+
+
+struct PSInput
+{
+	float4 position : SV_POSITION;
+	float4 color : COLOR;
+};
+
+PSInput VSMain(float3 position : POSITION, float4 color : COLOR)
+{
+	PSInput result;
+
+	result.position = float4(position, 1.0f);
+
+	// Pass the color through without modification.
+	result.color = color;
+
+	return result;
+}
+
+float4 PSMain(PSInput input) : SV_TARGET
+{
+	return input.color;
+}
--- a/Samples/simpleD3D12/simpleD3D12.cpp
+++ b/Samples/simpleD3D12/simpleD3D12.cpp
@ -0,0 +1,509 @@
+/*
+* Copyright 1993-2018 NVIDIA Corporation.  All rights reserved.
+*
+* Please refer to the NVIDIA end user license agreement (EULA) associated
+* with this source code for terms and conditions that govern your use of
+* this software. Any use, reproduction, disclosure, or distribution of
+* this software and related documentation outside the terms of the EULA
+* is strictly prohibited.
+*
+*/
+
+
+#include "stdafx.h"
+#include "simpleD3D12.h"
+#include <aclapi.h>
+
+//////////////////////////////////////////////
+// WindowsSecurityAttributes implementation //
+//////////////////////////////////////////////
+
+class WindowsSecurityAttributes {
+protected:
+	SECURITY_ATTRIBUTES m_winSecurityAttributes;
+	PSECURITY_DESCRIPTOR m_winPSecurityDescriptor;
+
+public:
+	WindowsSecurityAttributes();
+	~WindowsSecurityAttributes();
+	SECURITY_ATTRIBUTES * operator&();
+};
+
+WindowsSecurityAttributes::WindowsSecurityAttributes()
+{
+	m_winPSecurityDescriptor = (PSECURITY_DESCRIPTOR)calloc(1, SECURITY_DESCRIPTOR_MIN_LENGTH + 2 * sizeof(void**));
+	assert(m_winPSecurityDescriptor != (PSECURITY_DESCRIPTOR)NULL);
+
+	PSID *ppSID = (PSID *)((PBYTE)m_winPSecurityDescriptor + SECURITY_DESCRIPTOR_MIN_LENGTH);
+	PACL *ppACL = (PACL *)((PBYTE)ppSID + sizeof(PSID *));
+
+	InitializeSecurityDescriptor(m_winPSecurityDescriptor, SECURITY_DESCRIPTOR_REVISION);
+
+	SID_IDENTIFIER_AUTHORITY sidIdentifierAuthority = SECURITY_WORLD_SID_AUTHORITY;
+	AllocateAndInitializeSid(&sidIdentifierAuthority, 1, SECURITY_WORLD_RID, 0, 0, 0, 0, 0, 0, 0, ppSID);
+
+	EXPLICIT_ACCESS explicitAccess;
+	ZeroMemory(&explicitAccess, sizeof(EXPLICIT_ACCESS));
+	explicitAccess.grfAccessPermissions = STANDARD_RIGHTS_ALL | SPECIFIC_RIGHTS_ALL;
+	explicitAccess.grfAccessMode = SET_ACCESS;
+	explicitAccess.grfInheritance = INHERIT_ONLY;
+	explicitAccess.Trustee.TrusteeForm = TRUSTEE_IS_SID;
+	explicitAccess.Trustee.TrusteeType = TRUSTEE_IS_WELL_KNOWN_GROUP;
+	explicitAccess.Trustee.ptstrName = (LPTSTR)*ppSID;
+
+	SetEntriesInAcl(1, &explicitAccess, NULL, ppACL);
+
+	SetSecurityDescriptorDacl(m_winPSecurityDescriptor, TRUE, *ppACL, FALSE);
+
+	m_winSecurityAttributes.nLength = sizeof(m_winSecurityAttributes);
+	m_winSecurityAttributes.lpSecurityDescriptor = m_winPSecurityDescriptor;
+	m_winSecurityAttributes.bInheritHandle = TRUE;
+}
+
+WindowsSecurityAttributes::~WindowsSecurityAttributes()
+{
+	PSID* ppSID = (PSID*)((PBYTE)m_winPSecurityDescriptor + SECURITY_DESCRIPTOR_MIN_LENGTH);
+	PACL* ppACL = (PACL*)((PBYTE)ppSID + sizeof(PSID*));
+
+	if (*ppSID) {
+		FreeSid(*ppSID);
+	}
+	if (*ppACL) {
+		LocalFree(*ppACL);
+	}
+	free(m_winPSecurityDescriptor);
+}
+
+SECURITY_ATTRIBUTES *
+WindowsSecurityAttributes::operator&()
+{
+	return &m_winSecurityAttributes;
+}
+
+DX12CudaInterop::DX12CudaInterop(UINT width, UINT height, std::string name) :
+	DX12CudaSample(width, height, name),
+	m_frameIndex(0),
+	m_scissorRect(0, 0, static_cast<LONG>(width), static_cast<LONG>(height)),
+	m_fenceValues{},
+	m_rtvDescriptorSize(0)
+{
+	m_viewport = { 0.0f, 0.0f, static_cast<float>(width), static_cast<float>(height) };
+	m_AnimTime = 1.0f;
+}
+
+void DX12CudaInterop::OnInit()
+{
+	LoadPipeline();
+	InitCuda();
+	LoadAssets();
+}
+
+// Load the rendering pipeline dependencies.
+void DX12CudaInterop::LoadPipeline()
+{
+	UINT dxgiFactoryFlags = 0;
+
+#if defined(_DEBUG)
+	// Enable the debug layer (requires the Graphics Tools "optional feature").
+	// NOTE: Enabling the debug layer after device creation will invalidate the active device.
+	{
+		ComPtr<ID3D12Debug> debugController;
+		if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&debugController))))
+		{
+			debugController->EnableDebugLayer();
+
+			// Enable additional debug layers.
+			dxgiFactoryFlags |= DXGI_CREATE_FACTORY_DEBUG;
+		}
+	}
+#endif
+
+	ComPtr<IDXGIFactory4> factory;
+	ThrowIfFailed(CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(&factory)));
+
+	if (m_useWarpDevice)
+	{
+		ComPtr<IDXGIAdapter> warpAdapter;
+		ThrowIfFailed(factory->EnumWarpAdapter(IID_PPV_ARGS(&warpAdapter)));
+
+		ThrowIfFailed(D3D12CreateDevice(
+			warpAdapter.Get(),
+			D3D_FEATURE_LEVEL_11_0,
+			IID_PPV_ARGS(&m_device)
+			));
+	}
+	else
+	{
+		ComPtr<IDXGIAdapter1> hardwareAdapter;
+		GetHardwareAdapter(factory.Get(), &hardwareAdapter);
+
+		ThrowIfFailed(D3D12CreateDevice(
+			hardwareAdapter.Get(),
+			D3D_FEATURE_LEVEL_11_0,
+			IID_PPV_ARGS(&m_device)
+			));
+		DXGI_ADAPTER_DESC1 desc;
+		hardwareAdapter->GetDesc1(&desc);
+		m_dx12deviceluid = desc.AdapterLuid;
+	}
+
+	// Describe and create the command queue.
+	D3D12_COMMAND_QUEUE_DESC queueDesc = {};
+	queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
+	queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
+
+	ThrowIfFailed(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&m_commandQueue)));
+
+	// Describe and create the swap chain.
+	DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {};
+	swapChainDesc.BufferCount = FrameCount;
+	swapChainDesc.Width = m_width;
+	swapChainDesc.Height = m_height;
+	swapChainDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
+	swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
+	swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
+	swapChainDesc.SampleDesc.Count = 1;
+
+	ComPtr<IDXGISwapChain1> swapChain;
+	ThrowIfFailed(factory->CreateSwapChainForHwnd(
+		m_commandQueue.Get(),		// Swap chain needs the queue so that it can force a flush on it.
+		Win32Application::GetHwnd(),
+		&swapChainDesc,
+		nullptr,
+		nullptr,
+		&swapChain
+		));
+
+	// This sample does not support fullscreen transitions.
+	ThrowIfFailed(factory->MakeWindowAssociation(Win32Application::GetHwnd(), DXGI_MWA_NO_ALT_ENTER));
+
+	ThrowIfFailed(swapChain.As(&m_swapChain));
+	m_frameIndex = m_swapChain->GetCurrentBackBufferIndex();
+
+	// Create descriptor heaps.
+	{
+		// Describe and create a render target view (RTV) descriptor heap.
+		D3D12_DESCRIPTOR_HEAP_DESC rtvHeapDesc = {};
+		rtvHeapDesc.NumDescriptors = FrameCount;
+		rtvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
+		rtvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
+		ThrowIfFailed(m_device->CreateDescriptorHeap(&rtvHeapDesc, IID_PPV_ARGS(&m_rtvHeap)));
+
+		m_rtvDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
+	}
+
+	// Create frame resources.
+	{
+		CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(m_rtvHeap->GetCPUDescriptorHandleForHeapStart());
+
+		// Create a RTV and a command allocator for each frame.
+		for (UINT n = 0; n < FrameCount; n++)
+		{
+			ThrowIfFailed(m_swapChain->GetBuffer(n, IID_PPV_ARGS(&m_renderTargets[n])));
+			m_device->CreateRenderTargetView(m_renderTargets[n].Get(), nullptr, rtvHandle);
+			rtvHandle.Offset(1, m_rtvDescriptorSize);
+
+			ThrowIfFailed(m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocators[n])));
+		}
+	}
+}
+
+void DX12CudaInterop::InitCuda()
+{
+	int num_cuda_devices = 0;
+	checkCudaErrors(cudaGetDeviceCount(&num_cuda_devices));
+
+	if (!num_cuda_devices)
+	{
+		throw std::exception("No CUDA Devices found");
+	}
+	for (UINT devId = 0; devId < num_cuda_devices; devId++)
+	{
+		cudaDeviceProp devProp;
+		checkCudaErrors(cudaGetDeviceProperties(&devProp, devId));
+
+		if ((memcmp(&m_dx12deviceluid.LowPart, devProp.luid, sizeof(m_dx12deviceluid.LowPart)) == 0) && (memcmp(&m_dx12deviceluid.HighPart, devProp.luid + sizeof(m_dx12deviceluid.LowPart), sizeof(m_dx12deviceluid.HighPart)) == 0))
+        {
+			checkCudaErrors(cudaSetDevice(devId));
+			m_cudaDeviceID = devId;
+			m_nodeMask = devProp.luidDeviceNodeMask;
+			checkCudaErrors(cudaStreamCreate(&m_streamToRun));
+			printf("CUDA Device Used [%d] %s\n", devId, devProp.name);
+			break;
+		}
+	}
+}
+// Load the sample assets.
+void DX12CudaInterop::LoadAssets()
+{
+	// Create a root signature.
+	{
+		CD3DX12_DESCRIPTOR_RANGE range;
+		CD3DX12_ROOT_PARAMETER parameter;
+
+		range.Init(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0);
+		parameter.InitAsDescriptorTable(1, &range, D3D12_SHADER_VISIBILITY_VERTEX);
+
+		D3D12_ROOT_SIGNATURE_FLAGS rootSignatureFlags =
+			D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT | // Only the input assembler stage needs access to the constant buffer.
+			D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS |
+			D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS |
+			D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS |
+			D3D12_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS;
+
+		CD3DX12_ROOT_SIGNATURE_DESC descRootSignature;
+		descRootSignature.Init(1, &parameter, 0, nullptr, rootSignatureFlags);
+		ComPtr<ID3DBlob> pSignature;
+		ComPtr<ID3DBlob> pError;
+		ThrowIfFailed(D3D12SerializeRootSignature(&descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, pSignature.GetAddressOf(), pError.GetAddressOf()));
+		ThrowIfFailed(m_device->CreateRootSignature(0, pSignature->GetBufferPointer(), pSignature->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature)));
+	}
+	// Create the pipeline state, which includes compiling and loading shaders.
+	{
+		ComPtr<ID3DBlob> vertexShader;
+		ComPtr<ID3DBlob> pixelShader;
+
+#if defined(_DEBUG)
+		// Enable better shader debugging with the graphics debugging tools.
+		UINT compileFlags = D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION;
+#else
+		UINT compileFlags = 0;
+#endif
+		std::wstring filePath = GetAssetFullPath("shaders.hlsl");
+		LPCWSTR result = filePath.c_str();
+		ThrowIfFailed(D3DCompileFromFile(result, nullptr, nullptr, "VSMain", "vs_5_0", compileFlags, 0, &vertexShader, nullptr));
+		ThrowIfFailed(D3DCompileFromFile(result, nullptr, nullptr, "PSMain", "ps_5_0", compileFlags, 0, &pixelShader, nullptr));
+
+		// Define the vertex input layout.
+		D3D12_INPUT_ELEMENT_DESC inputElementDescs[] =
+		{
+			{ "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
+			{ "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }
+		};
+
+		// Describe and create the graphics pipeline state object (PSO).
+		D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {};
+		psoDesc.InputLayout = { inputElementDescs, _countof(inputElementDescs) };
+		psoDesc.pRootSignature = m_rootSignature.Get();
+		psoDesc.VS = CD3DX12_SHADER_BYTECODE(vertexShader.Get());
+		psoDesc.PS = CD3DX12_SHADER_BYTECODE(pixelShader.Get());
+		psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT);
+		psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT);
+		psoDesc.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC(D3D12_DEFAULT);
+		psoDesc.SampleMask = UINT_MAX;
+		psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
+		psoDesc.NumRenderTargets = 1;
+		psoDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM;
+		psoDesc.SampleDesc.Count = 1;
+		ThrowIfFailed(m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&m_pipelineState)));
+	}
+
+	// Create the command list.
+	ThrowIfFailed(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocators[m_frameIndex].Get(), m_pipelineState.Get(), IID_PPV_ARGS(&m_commandList)));
+
+	// Command lists are created in the recording state, but there is nothing
+	// to record yet. The main loop expects it to be closed, so close it now.
+	ThrowIfFailed(m_commandList->Close());
+
+	// Create the vertex buffer.
+	{
+		// Define the geometry for a triangle.
+		vertBufWidth = m_width/2;
+		vertBufHeight = m_height/2;
+		const UINT vertexBufferSize = sizeof(Vertex)*vertBufWidth*vertBufHeight;
+
+		ThrowIfFailed(m_device->CreateCommittedResource(
+			&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
+			D3D12_HEAP_FLAG_SHARED,
+			&CD3DX12_RESOURCE_DESC::Buffer(vertexBufferSize),
+			D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER,
+			nullptr,
+			IID_PPV_ARGS(&m_vertexBuffer)));
+
+		// Initialize the vertex buffer view.
+		m_vertexBufferView.BufferLocation = m_vertexBuffer->GetGPUVirtualAddress();
+		m_vertexBufferView.StrideInBytes = sizeof(Vertex);
+		m_vertexBufferView.SizeInBytes = vertexBufferSize;
+
+		HANDLE sharedHandle;
+		WindowsSecurityAttributes windowsSecurityAttributes;
+		LPCWSTR name = NULL;
+		ThrowIfFailed(m_device->CreateSharedHandle(m_vertexBuffer.Get(), &windowsSecurityAttributes, GENERIC_ALL, name, &sharedHandle));
+
+		D3D12_RESOURCE_ALLOCATION_INFO d3d12ResourceAllocationInfo;
+		d3d12ResourceAllocationInfo = m_device->GetResourceAllocationInfo(m_nodeMask, 1, &CD3DX12_RESOURCE_DESC::Buffer(vertexBufferSize));
+		size_t actualSize = d3d12ResourceAllocationInfo.SizeInBytes;
+		size_t alignment  = d3d12ResourceAllocationInfo.Alignment;
+
+		cudaExternalMemoryHandleDesc externalMemoryHandleDesc;
+        memset(&externalMemoryHandleDesc, 0, sizeof(externalMemoryHandleDesc));
+
+		externalMemoryHandleDesc.type = cudaExternalMemoryHandleTypeD3D12Resource;
+		externalMemoryHandleDesc.handle.win32.handle = sharedHandle;
+		externalMemoryHandleDesc.size = actualSize;
+		externalMemoryHandleDesc.flags = cudaExternalMemoryDedicated;
+
+		checkCudaErrors(cudaImportExternalMemory(&m_externalMemory, &externalMemoryHandleDesc));
+
+		cudaExternalMemoryBufferDesc externalMemoryBufferDesc;
+		memset(&externalMemoryBufferDesc, 0, sizeof(externalMemoryBufferDesc));
+		externalMemoryBufferDesc.offset = 0;
+		externalMemoryBufferDesc.size   = vertexBufferSize;
+		externalMemoryBufferDesc.flags  = 0;
+
+		checkCudaErrors(cudaExternalMemoryGetMappedBuffer(&m_cudaDevVertptr, m_externalMemory, &externalMemoryBufferDesc));
+		RunSineWaveKernel(vertBufWidth, vertBufHeight, (Vertex *)m_cudaDevVertptr, m_streamToRun, 1.0f);
+		checkCudaErrors(cudaStreamSynchronize(m_streamToRun));
+
+	}
+
+	// Create synchronization objects and wait until assets have been uploaded to the GPU.
+	{
+		ThrowIfFailed(m_device->CreateFence(m_fenceValues[m_frameIndex], D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(&m_fence)));
+
+		cudaExternalSemaphoreHandleDesc externalSemaphoreHandleDesc;
+
+		memset(&externalSemaphoreHandleDesc, 0, sizeof(externalSemaphoreHandleDesc));
+		WindowsSecurityAttributes windowsSecurityAttributes;
+		LPCWSTR name = NULL;
+		HANDLE sharedHandle;
+		externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeD3D12Fence;
+		m_device->CreateSharedHandle(m_fence.Get(), &windowsSecurityAttributes, GENERIC_ALL, name, &sharedHandle);
+		externalSemaphoreHandleDesc.handle.win32.handle = (void *)sharedHandle;
+		externalSemaphoreHandleDesc.flags = 0;
+
+		checkCudaErrors(cudaImportExternalSemaphore(&m_externalSemaphore, &externalSemaphoreHandleDesc));
+
+		m_fenceValues[m_frameIndex]++;
+
+		// Create an event handle to use for frame synchronization.
+		m_fenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
+		if (m_fenceEvent == nullptr)
+		{
+			ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError()));
+		}
+
+		// Wait for the command list to execute; we are reusing the same command 
+		// list in our main loop but for now, we just want to wait for setup to 
+		// complete before continuing.
+		WaitForGpu();
+	}
+}
+
+// Render the scene.
+void DX12CudaInterop::OnRender()
+{
+	// Record all the commands we need to render the scene into the command list.
+	PopulateCommandList();
+
+	// Execute the command list.
+	ID3D12CommandList* ppCommandLists[] = { m_commandList.Get() };
+	m_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);
+
+	// Present the frame.
+	ThrowIfFailed(m_swapChain->Present(1, 0));
+
+	// Schedule a Signal command in the queue.
+	const UINT64 currentFenceValue = m_fenceValues[m_frameIndex];
+	ThrowIfFailed(m_commandQueue->Signal(m_fence.Get(), currentFenceValue));
+
+	MoveToNextFrame();
+}
+
+void DX12CudaInterop::OnDestroy()
+{
+	// Ensure that the GPU is no longer referencing resources that are about to be
+	// cleaned up by the destructor.
+	WaitForGpu();
+	checkCudaErrors(cudaDestroyExternalSemaphore(m_externalSemaphore));
+	checkCudaErrors(cudaDestroyExternalMemory(m_externalMemory));
+	CloseHandle(m_fenceEvent);
+}
+
+void DX12CudaInterop::PopulateCommandList()
+{
+	// Command list allocators can only be reset when the associated 
+	// command lists have finished execution on the GPU; apps should use 
+	// fences to determine GPU execution progress.
+	ThrowIfFailed(m_commandAllocators[m_frameIndex]->Reset());
+
+	// However, when ExecuteCommandList() is called on a particular command 
+	// list, that command list can then be reset at any time and must be before 
+	// re-recording.
+	ThrowIfFailed(m_commandList->Reset(m_commandAllocators[m_frameIndex].Get(), m_pipelineState.Get()));
+
+	m_commandList->SetGraphicsRootSignature(m_rootSignature.Get());
+
+	// Set necessary state.
+	m_commandList->RSSetViewports(1, &m_viewport);
+	m_commandList->RSSetScissorRects(1, &m_scissorRect);
+
+	// Indicate that the back buffer will be used as a render target.
+	m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_renderTargets[m_frameIndex].Get(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET));
+
+	CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(m_rtvHeap->GetCPUDescriptorHandleForHeapStart(), m_frameIndex, m_rtvDescriptorSize);
+	m_commandList->OMSetRenderTargets(1, &rtvHandle, FALSE, nullptr);
+
+	// Record commands.
+	const float clearColor[] = { 0.0f, 0.2f, 0.4f, 1.0f };
+	m_commandList->ClearRenderTargetView(rtvHandle, clearColor, 0, nullptr);
+	m_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_POINTLIST);
+	m_commandList->IASetVertexBuffers(0, 1, &m_vertexBufferView);
+	m_commandList->DrawInstanced(vertBufHeight*vertBufWidth, 1, 0, 0);
+
+	// Indicate that the back buffer will now be used to present.
+	m_commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_renderTargets[m_frameIndex].Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT));
+
+	ThrowIfFailed(m_commandList->Close());
+}
+
+// Wait for pending GPU work to complete.
+void DX12CudaInterop::WaitForGpu()
+{
+	// Schedule a Signal command in the queue.
+	ThrowIfFailed(m_commandQueue->Signal(m_fence.Get(), m_fenceValues[m_frameIndex]));
+
+	// Wait until the fence has been processed.
+	ThrowIfFailed(m_fence->SetEventOnCompletion(m_fenceValues[m_frameIndex], m_fenceEvent));
+	WaitForSingleObjectEx(m_fenceEvent, INFINITE, FALSE);
+
+	// Increment the fence value for the current frame.
+	m_fenceValues[m_frameIndex]++;
+}
+
+// Prepare to render the next frame.
+void DX12CudaInterop::MoveToNextFrame()
+{
+	const UINT64 currentFenceValue = m_fenceValues[m_frameIndex];
+	cudaExternalSemaphoreWaitParams externalSemaphoreWaitParams;
+	memset(&externalSemaphoreWaitParams, 0, sizeof(externalSemaphoreWaitParams));
+
+	externalSemaphoreWaitParams.params.fence.value = currentFenceValue;
+	externalSemaphoreWaitParams.flags = 0;
+
+	checkCudaErrors(cudaWaitExternalSemaphoresAsync(&m_externalSemaphore, &externalSemaphoreWaitParams, 1, m_streamToRun));
+
+	m_AnimTime += 0.01f;
+	RunSineWaveKernel(vertBufWidth, vertBufHeight, (Vertex *)m_cudaDevVertptr, m_streamToRun, m_AnimTime);
+
+	cudaExternalSemaphoreSignalParams externalSemaphoreSignalParams;
+	memset(&externalSemaphoreSignalParams, 0, sizeof(externalSemaphoreSignalParams));
+	m_fenceValues[m_frameIndex] = currentFenceValue + 1;
+	externalSemaphoreSignalParams.params.fence.value = m_fenceValues[m_frameIndex];
+	externalSemaphoreSignalParams.flags = 0;
+
+	checkCudaErrors(cudaSignalExternalSemaphoresAsync(&m_externalSemaphore, &externalSemaphoreSignalParams, 1, m_streamToRun));
+
+	// Update the frame index.
+	m_frameIndex = m_swapChain->GetCurrentBackBufferIndex();
+
+	// If the next frame is not ready to be rendered yet, wait until it is ready.
+	if (m_fence->GetCompletedValue() < m_fenceValues[m_frameIndex])
+	{
+		ThrowIfFailed(m_fence->SetEventOnCompletion(m_fenceValues[m_frameIndex], m_fenceEvent));
+		WaitForSingleObjectEx(m_fenceEvent, INFINITE, FALSE);
+	}
+
+	// Set the fence value for the next frame.
+	m_fenceValues[m_frameIndex] = currentFenceValue + 2;
+}
--- a/Samples/simpleD3D12/simpleD3D12.h
+++ b/Samples/simpleD3D12/simpleD3D12.h
@ -0,0 +1,127 @@
+/*
+* Copyright 1993-2018 NVIDIA Corporation.  All rights reserved.
+*
+* Please refer to the NVIDIA end user license agreement (EULA) associated
+* with this source code for terms and conditions that govern your use of
+* this software. Any use, reproduction, disclosure, or distribution of
+* this software and related documentation outside the terms of the EULA
+* is strictly prohibited.
+*
+*/
+
+#pragma once
+
+#include "DX12CudaSample.h"
+#include "ShaderStructs.h"
+
+using namespace DirectX;
+
+// Note that while ComPtr is used to manage the lifetime of resources on the CPU,
+// it has no understanding of the lifetime of resources on the GPU. Apps must account
+// for the GPU lifetime of resources to avoid destroying objects that may still be
+// referenced by the GPU.
+// An example of this can be found in the class method: OnDestroy().
+using Microsoft::WRL::ComPtr;
+
+static const char *shaderstr = 
+" struct PSInput \n" \
+" { \n" \
+"  float4 position : SV_POSITION; \n" \
+"  float4 color : COLOR; \n" \
+" } \n" \
+" PSInput VSMain(float3 position : POSITION, float4 color : COLOR) \n" \
+" { \n" \
+"  PSInput result;\n" \
+"  result.position = float4(position, 1.0f);\n" \
+"  result.color = color;\n"	\
+"  return result; \n" \
+" } \n" \
+" float4 PSMain(PSInput input) : SV_TARGET \n" \
+" { \n" \
+"  return input.color;\n" \
+" } \n";
+
+class DX12CudaInterop : public DX12CudaSample
+{
+public:
+	DX12CudaInterop(UINT width, UINT height, std::string name);
+
+	virtual void OnInit();
+	virtual void OnRender();
+	virtual void OnDestroy();
+
+private:
+	// In this sample we overload the meaning of FrameCount to mean both the maximum
+	// number of frames that will be queued to the GPU at a time, as well as the number
+	// of back buffers in the DXGI swap chain. For the majority of applications, this
+	// is convenient and works well. However, there will be certain cases where an
+	// application may want to queue up more frames than there are back buffers
+	// available.
+	// It should be noted that excessive buffering of frames dependent on user input
+	// may result in noticeable latency in your app.
+	static const UINT FrameCount = 2;
+	std::string shadersSrc = shaderstr;
+#if 0
+		" struct PSInput \n" \
+		" { \n" \
+		"  float4 position : SV_POSITION; \n" \
+		"  float4 color : COLOR; \n" \
+		" } \n" \
+		" PSInput VSMain(float3 position : POSITION, float4 color : COLOR) \n" \
+		" { \n" \
+		"  PSInput result;\n" \
+		"  result.position = float4(position, 1.0f);\n" \
+		"  result.color = color;\n"	\
+		"  return result; \n" \
+		" } \n" \
+		" float4 PSMain(PSInput input) : SV_TARGET \n" \
+		" { \n" \
+		"  return input.color;\n" \
+		" } \n";
+#endif
+
+	// Vertex Buffer dimension
+	size_t vertBufHeight, vertBufWidth;
+
+	// Pipeline objects.
+	D3D12_VIEWPORT m_viewport;
+	CD3DX12_RECT m_scissorRect;
+	ComPtr<IDXGISwapChain3> m_swapChain;
+	ComPtr<ID3D12Device> m_device;
+	ComPtr<ID3D12Resource> m_renderTargets[FrameCount];
+	ComPtr<ID3D12CommandAllocator> m_commandAllocators[FrameCount];
+	ComPtr<ID3D12CommandQueue>   m_commandQueue;
+	ComPtr<ID3D12RootSignature>  m_rootSignature;
+	ComPtr<ID3D12DescriptorHeap> m_rtvHeap;
+	ComPtr<ID3D12PipelineState>  m_pipelineState;
+	ComPtr<ID3D12GraphicsCommandList> m_commandList;
+	UINT m_rtvDescriptorSize;
+
+	// App resources.
+	ComPtr<ID3D12Resource> m_vertexBuffer;
+	D3D12_VERTEX_BUFFER_VIEW m_vertexBufferView;
+
+	// Synchronization objects.
+	UINT m_frameIndex;
+	HANDLE m_fenceEvent;
+	ComPtr<ID3D12Fence> m_fence;
+	UINT64 m_fenceValues[FrameCount];
+
+	// CUDA objects
+	cudaExternalMemoryHandleType m_externalMemoryHandleType;
+	cudaExternalMemory_t	     m_externalMemory;
+	cudaExternalSemaphore_t      m_externalSemaphore;
+	cudaStream_t				 m_streamToRun;
+	LUID						 m_dx12deviceluid;
+	UINT						 m_cudaDeviceID;
+	UINT						 m_nodeMask;
+	float						 m_AnimTime;
+	void *m_cudaDevVertptr = NULL;
+
+	void LoadPipeline();
+	void InitCuda();
+	void LoadAssets();
+	void PopulateCommandList();
+	void MoveToNextFrame();
+	void WaitForGpu();
+};
--- a/Samples/simpleD3D12/simpleD3D12_vs2015.sln
+++ b/Samples/simpleD3D12/simpleD3D12_vs2015.sln
@ -0,0 +1,20 @@
+
+Microsoft Visual Studio Solution File, Format Version 14.00
+# Visual Studio 2015
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "simpleD3D12", "simpleD3D12_vs2015.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
+		{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
+		{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
+		{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
--- a/Samples/simpleD3D12/simpleD3D12_vs2015.vcxproj
+++ b/Samples/simpleD3D12/simpleD3D12_vs2015.vcxproj
@ -0,0 +1,134 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup>
+    <CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
+  </PropertyGroup>
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
+    <RootNamespace>simpleD3D12_vs2015</RootNamespace>
+    <ProjectName>simpleD3D12</ProjectName>
+    <CudaToolkitCustomDir />
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
+    <LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
+    <WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
+    <TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup>
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)'=='Debug'">
+    <UseDebugLibraries>true</UseDebugLibraries>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)'=='Release'">
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+    <Import Project="$(CUDAPropsPath)\CUDA 10.0.props" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets">
+    <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <IntDir>$(Platform)/$(Configuration)/</IntDir>
+    <IncludePath>$(IncludePath)</IncludePath>
+    <CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules />
+    <CodeAnalysisRuleAssemblies />
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Platform)'=='x64'">
+    <OutDir>../../bin/win64/$(Configuration)/</OutDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup>
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(DXSDK_DIR)/include;</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <AdditionalDependencies>cudart_static.lib;d3d12.lib;d3dcompiler.lib;dxgi.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
+      <OutputFile>$(OutDir)/simpleD3D12.exe</OutputFile>
+    </Link>
+    <CudaCompile>
+      <CodeGeneration>compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;</CodeGeneration>
+      <AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
+      <Include>./;../../Common</Include>
+      <Defines>WIN32</Defines>
+    </CudaCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
+    </Link>
+    <CudaCompile>
+      <Runtime>MTd</Runtime>
+      <TargetMachinePlatform>64</TargetMachinePlatform>
+    </CudaCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>false</GenerateDebugInformation>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+    </Link>
+    <CudaCompile>
+      <Runtime>MT</Runtime>
+      <TargetMachinePlatform>64</TargetMachinePlatform>
+    </CudaCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Platform)'=='Win32'">
+    <Link>
+      <AdditionalLibraryDirectories>$(DXSDK_DIR)/Lib/x86;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Platform)'=='x64'">
+    <Link>
+      <AdditionalLibraryDirectories>$(DXSDK_DIR)/Lib/$(PlatformName);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="DX12CudaSample.cpp" />
+    <ClCompile Include="Main.cpp" />
+    <ClCompile Include="Win32Application.cpp" />
+    <ClCompile Include="simpleD3D12.cpp" />
+    <CudaCompile Include="sinewave_cuda.cu" />
+    <ClCompile Include="stdafx.cpp" />
+    <ClInclude Include="DX12CudaSample.h" />
+    <ClInclude Include="DXSampleHelper.h" />
+    <ClInclude Include="ShaderStructs.h" />
+    <ClInclude Include="Win32Application.h" />
+    <ClInclude Include="d3dx12.h" />
+    <ClInclude Include="shaders.hlsl" />
+    <ClInclude Include="simpleD3D12.h" />
+    <ClInclude Include="stdafx.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+    <Import Project="$(CUDAPropsPath)\CUDA 10.0.targets" />
+  </ImportGroup>
+</Project>
--- a/Samples/simpleD3D12/simpleD3D12_vs2017.sln
+++ b/Samples/simpleD3D12/simpleD3D12_vs2017.sln
@ -0,0 +1,20 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 2017
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "simpleD3D12", "simpleD3D12_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
+		{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
+		{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
+		{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
--- a/Samples/simpleD3D12/simpleD3D12_vs2017.vcxproj
+++ b/Samples/simpleD3D12/simpleD3D12_vs2017.vcxproj
@ -0,0 +1,135 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup>
+    <CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
+  </PropertyGroup>
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
+    <RootNamespace>simpleD3D12_vs2017</RootNamespace>
+    <ProjectName>simpleD3D12</ProjectName>
+    <CudaToolkitCustomDir />
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
+    <LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
+    <WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
+    <TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup>
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+	<WindowsTargetPlatformVersion>10.0.15063.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)'=='Debug'">
+    <UseDebugLibraries>true</UseDebugLibraries>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)'=='Release'">
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+    <Import Project="$(CUDAPropsPath)\CUDA 10.0.props" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets">
+    <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <IntDir>$(Platform)/$(Configuration)/</IntDir>
+    <IncludePath>$(IncludePath)</IncludePath>
+    <CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules />
+    <CodeAnalysisRuleAssemblies />
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Platform)'=='x64'">
+    <OutDir>../../bin/win64/$(Configuration)/</OutDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup>
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(DXSDK_DIR)/include;</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <AdditionalDependencies>cudart_static.lib;d3d12.lib;d3dcompiler.lib;dxgi.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
+      <OutputFile>$(OutDir)/simpleD3D12.exe</OutputFile>
+    </Link>
+    <CudaCompile>
+      <CodeGeneration>compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;</CodeGeneration>
+      <AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
+      <Include>./;../../Common</Include>
+      <Defines>WIN32</Defines>
+    </CudaCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
+    </Link>
+    <CudaCompile>
+      <Runtime>MTd</Runtime>
+      <TargetMachinePlatform>64</TargetMachinePlatform>
+    </CudaCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>false</GenerateDebugInformation>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+    </Link>
+    <CudaCompile>
+      <Runtime>MT</Runtime>
+      <TargetMachinePlatform>64</TargetMachinePlatform>
+    </CudaCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Platform)'=='Win32'">
+    <Link>
+      <AdditionalLibraryDirectories>$(DXSDK_DIR)/Lib/x86;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Platform)'=='x64'">
+    <Link>
+      <AdditionalLibraryDirectories>$(DXSDK_DIR)/Lib/$(PlatformName);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="DX12CudaSample.cpp" />
+    <ClCompile Include="Main.cpp" />
+    <ClCompile Include="Win32Application.cpp" />
+    <ClCompile Include="simpleD3D12.cpp" />
+    <CudaCompile Include="sinewave_cuda.cu" />
+    <ClCompile Include="stdafx.cpp" />
+    <ClInclude Include="DX12CudaSample.h" />
+    <ClInclude Include="DXSampleHelper.h" />
+    <ClInclude Include="ShaderStructs.h" />
+    <ClInclude Include="Win32Application.h" />
+    <ClInclude Include="d3dx12.h" />
+    <ClInclude Include="shaders.hlsl" />
+    <ClInclude Include="simpleD3D12.h" />
+    <ClInclude Include="stdafx.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+    <Import Project="$(CUDAPropsPath)\CUDA 10.0.targets" />
+  </ImportGroup>
+</Project>
--- a/Samples/simpleD3D12/sinewave_cuda.cu
+++ b/Samples/simpleD3D12/sinewave_cuda.cu
@ -0,0 +1,55 @@
+/*
+ * Copyright 1993-2018 NVIDIA Corporation.  All rights reserved.
+ *
+ * Please refer to the NVIDIA end user license agreement (EULA) associated
+ * with this source code for terms and conditions that govern your use of
+ * this software. Any use, reproduction, disclosure, or distribution of
+ * this software and related documentation outside the terms of the EULA
+ * is strictly prohibited.
+ *
+ */
+
+#include <stdio.h>
+#include "ShaderStructs.h"
+#include "helper_cuda.h"
+
+__global__ void sinewave_gen_kernel(Vertex *vertices, unsigned int width, unsigned int height, float time)
+{
+    unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+    unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    // calculate uv coordinates
+    float u = x / (float) width;
+    float v = y / (float) height;
+    u = u*2.0f - 1.0f;
+    v = v*2.0f - 1.0f;
+
+    // calculate simple sine wave pattern
+    float freq = 4.0f;
+    float w = sinf(u*freq + time) * cosf(v*freq + time) * 0.5f;
+
+    if (y < height && x < width)
+    {
+        // write output vertex
+        vertices[y*width+x].position.x = u;
+        vertices[y*width+x].position.y = w;
+        vertices[y*width+x].position.z = v;
+        //vertices[y*width+x].position[3] = 1.0f;
+        vertices[y*width+x].color.x = 1.0f;
+        vertices[y*width+x].color.y = 0.0f;
+        vertices[y*width+x].color.z = 0.0f;
+		vertices[y*width + x].color.w = 0.0f;
+    }
+}
+
+// The host CPU Sinewave thread spawner
+void RunSineWaveKernel(size_t mesh_width, size_t mesh_height, Vertex *cudaDevVertptr, cudaStream_t streamToRun, float AnimTime)
+{
+	dim3 block(16, 16, 1);
+	dim3 grid(mesh_width / 16, mesh_height / 16, 1);
+	Vertex *vertices = (Vertex*)cudaDevVertptr;
+	sinewave_gen_kernel<<< grid, block, 0, streamToRun >>>(vertices, mesh_width, mesh_height, AnimTime);
+
+	getLastCudaError("sinewave_gen_kernel execution failed.\n");
+}
+
--- a/Samples/simpleD3D12/stdafx.cpp
+++ b/Samples/simpleD3D12/stdafx.cpp
@ -0,0 +1,12 @@
+/*
+* Copyright 1993-2018 NVIDIA Corporation.  All rights reserved.
+*
+* Please refer to the NVIDIA end user license agreement (EULA) associated
+* with this source code for terms and conditions that govern your use of
+* this software. Any use, reproduction, disclosure, or distribution of
+* this software and related documentation outside the terms of the EULA
+* is strictly prohibited.
+*
+*/
+
+#include "stdafx.h"
--- a/Samples/simpleD3D12/stdafx.h
+++ b/Samples/simpleD3D12/stdafx.h
@ -0,0 +1,33 @@
+/*
+* Copyright 1993-2018 NVIDIA Corporation.  All rights reserved.
+*
+* Please refer to the NVIDIA end user license agreement (EULA) associated
+* with this source code for terms and conditions that govern your use of
+* this software. Any use, reproduction, disclosure, or distribution of
+* this software and related documentation outside the terms of the EULA
+* is strictly prohibited.
+*
+*/
+
+
+// stdafx.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently.
+
+#pragma once
+
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN             // Exclude rarely-used stuff from Windows headers.
+#endif
+
+#include <windows.h>
+
+#include <d3d12.h>
+#include <dxgi1_4.h>
+#include <D3Dcompiler.h>
+#include <DirectXMath.h>
+#include "d3dx12.h"
+
+#include <string>
+#include <wrl.h>
+#include <shellapi.h>