cuda-samples/Samples/SLID3D10Texture/SLID3D10Texture.cpp

845 lines
27 KiB
C++
Raw Normal View History

2021-10-21 19:04:49 +08:00
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//
// This example demonstrates interoperability of SLI with a Direct3D10 texture
// and CUDA. The program creates a D3D10 texture which is written from
// a CUDA kernel. Direct3D then renders the result on the screen.
// A Direct3D Capable device is required.
//
#pragma warning(disable : 4312)
#include <windows.h>
#include <mmsystem.h>
#pragma warning(disable : 4996) // disable deprecated warning
#include <strsafe.h>
#pragma warning(default : 4996)
// this header includes all the necessary D3D10 includes
#include <dynlink_d3d10.h>
// includes, cuda
#include <cuda.h>
#include <builtin_types.h>
#include <cuda_runtime.h>
#include <cuda_d3d10_interop.h>
#include <d3d10.h>
// includes, project
#include <rendercheck_d3d10.h> // automated testing
#include <helper_cuda.h> // helper for CUDA error checking and initialization
#define MAX_EPSILON 10
static char *SDK_name = "SLID3D10Texture";
//-----------------------------------------------------------------------------
// Global variables
//-----------------------------------------------------------------------------
IDXGIAdapter *g_pCudaCapableAdapter = NULL; // Adapter to use
ID3D10Device *g_pd3dDevice = NULL; // Our rendering device
IDXGISwapChain *g_pSwapChain = NULL; // The swap chain of the window
ID3D10RenderTargetView *g_pSwapChainRTV =
NULL; // The Render target view on the swap chain ( used for clear)
ID3D10RasterizerState *g_pRasterState = NULL;
ID3D10InputLayout *g_pInputLayout = NULL;
ID3D10Effect *g_pSimpleEffect = NULL;
ID3D10EffectTechnique *g_pSimpleTechnique = NULL;
ID3D10EffectVectorVariable *g_pvQuadRect = NULL;
ID3D10EffectShaderResourceVariable *g_pTexture2D = NULL;
static const char g_simpleEffectSrc[] =
"float4 g_vQuadRect; \n"
"Texture2D g_Texture2D; \n"
"\n"
"SamplerState samLinear{ \n"
" Filter = MIN_MAG_LINEAR_MIP_POINT; \n"
"};\n"
"\n"
"struct Fragment{ \n"
" float4 Pos : SV_POSITION;\n"
" float3 Tex : TEXCOORD0; };\n"
"\n"
"Fragment VS( uint vertexId : SV_VertexID )\n"
"{\n"
" Fragment f;\n"
" f.Tex = float3( 0.f, 0.f, 0.f); \n"
" if (vertexId == 1) f.Tex.x = 1.f; \n"
" else if (vertexId == 2) f.Tex.y = 1.f; \n"
" else if (vertexId == 3) f.Tex.xy = float2(1.f, 1.f); \n"
" \n"
" f.Pos = float4( g_vQuadRect.xy + f.Tex * g_vQuadRect.zw, 0, 1);\n"
" \n"
" return f;\n"
"}\n"
"\n"
"float4 PS( Fragment f ) : SV_Target\n"
"{\n"
" // return g_Texture2D.Sample( samLinear, f.Tex.xy ); \n"
" // return float4(f.Tex, 1);\n"
" float4 g = g_Texture2D.Sample( samLinear, f.Tex.xy );"
" for (int i = 0; i < 1024; ++i) { "
" g.x = sqrt(g.x);"
" g.x += 0.0001;"
" g.x = g.x * g.x;"
" }"
" return g;\n"
"}\n"
"\n"
"technique10 Render\n"
"{\n"
" pass P0\n"
" {\n"
" SetVertexShader( CompileShader( vs_4_0, VS() ) );\n"
" SetGeometryShader( NULL );\n"
" SetPixelShader( CompileShader( ps_4_0, PS() ) );\n"
" }\n"
"}\n"
"\n";
// testing/tracing function used pervasively in tests. If the condition is
// unsatisfied
// then spew and fail the function immediately (doing no cleanup)
#define AssertOrQuit(x) \
if (!(x)) { \
fprintf(stdout, "Assert unsatisfied in %s at %s:%d\n", __FUNCTION__, \
__FILE__, __LINE__); \
return 1; \
}
bool g_bDone = false;
bool g_bPassed = true;
int *pArgc = NULL;
char **pArgv = NULL;
const unsigned int g_WindowWidth = 720;
const unsigned int g_WindowHeight = 720;
bool g_bQAReadback = false;
int g_iFrameToCompare = 1;
struct CudaContextData {
UINT index;
CUcontext context;
int deviceOrdinal;
cudaGraphicsResource *cudaResource;
void *cudaLinearMemory;
};
UINT g_ContextCount = 0;
CudaContextData g_ContextData[32];
// Data structure for 2D texture shared between DX10 and CUDA
struct {
ID3D10Texture2D *pTexture;
ID3D10ShaderResourceView *pSRView;
size_t pitch;
int width;
int height;
} g_texture_2d;
// The CUDA kernel launchers that get called
extern "C" {
bool cuda_texture_2d(void *surface, size_t width, size_t height, size_t pitch,
float t);
}
//-----------------------------------------------------------------------------
// Forward declarations
//-----------------------------------------------------------------------------
HRESULT InitD3D(HWND hWnd);
HRESULT InitTextures();
int RunKernels(CudaContextData *currentContextData);
void DrawScene();
int Cleanup();
int Render();
LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
#define NAME_LEN 512
bool findCUDADevice() {
int nGraphicsGPU = 0;
int deviceCount = 0;
bool bFoundGraphics = false;
char devname[NAME_LEN];
// This function call returns 0 if there are no CUDA capable devices.
cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
if (error_id != cudaSuccess) {
printf("cudaGetDeviceCount returned %d\n-> %s\n", (int)error_id,
cudaGetErrorString(error_id));
exit(EXIT_FAILURE);
}
if (deviceCount == 0) {
printf("> There are no device(s) supporting CUDA\n");
return false;
} else {
printf("> Found %d CUDA Capable Device(s)\n", deviceCount);
}
// Get CUDA device properties
cudaDeviceProp deviceProp;
for (int dev = 0; dev < deviceCount; ++dev) {
cudaGetDeviceProperties(&deviceProp, dev);
STRCPY(devname, NAME_LEN, deviceProp.name);
printf("> GPU %d: %s\n", dev, devname);
}
return true;
}
bool findDXDevice(char *dev_name) {
HRESULT hr = S_OK;
cudaError cuStatus;
// Iterate through the candidate adapters
IDXGIFactory *pFactory;
hr = sFnPtr_CreateDXGIFactory(__uuidof(IDXGIFactory), (void **)(&pFactory));
if (!SUCCEEDED(hr)) {
printf("> No DXGI Factory created.\n");
return false;
}
UINT adapter = 0;
for (; !g_pCudaCapableAdapter; ++adapter) {
// Get a candidate DXGI adapter
IDXGIAdapter *pAdapter = NULL;
hr = pFactory->EnumAdapters(adapter, &pAdapter);
if (FAILED(hr)) {
break; // no compatible adapters found
}
// Query to see if there exists a corresponding compute device
int cuDevice;
cuStatus = cudaD3D10GetDevice(&cuDevice, pAdapter);
printLastCudaError("cudaD3D10GetDevice failed"); // This prints and resets
// the cudaError to
// cudaSuccess
if (cudaSuccess == cuStatus) {
// If so, mark it as the one against which to create our d3d10 device
g_pCudaCapableAdapter = pAdapter;
g_pCudaCapableAdapter->AddRef();
}
pAdapter->Release();
}
printf("> Found %d D3D10 Adapater(s).\n", (int)adapter);
pFactory->Release();
if (!g_pCudaCapableAdapter) {
printf("> Found 0 D3D10 Adapater(s) /w Compute capability.\n");
return false;
}
DXGI_ADAPTER_DESC adapterDesc;
g_pCudaCapableAdapter->GetDesc(&adapterDesc);
wcstombs_s(NULL, dev_name, 256, adapterDesc.Description, 128);
printf("> Found 1 D3D10 Adapater(s) /w Compute capability.\n");
printf("> %s\n", dev_name);
return true;
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char *argv[]) {
char device_name[256];
char *ref_file = NULL;
pArgc = &argc;
pArgv = argv;
printf("%s Starting...\n\n", SDK_name);
if (!findCUDADevice()) // Search for CUDA GPU
{
printf("> CUDA Device NOT found on \"%s\".. Exiting.\n", device_name);
exit(EXIT_SUCCESS);
}
if (!dynlinkLoadD3D10API()) // Search for D3D API (locate drivers, does not
// mean device is found)
{
printf("> D3D10 API libraries NOT found on.. Exiting.\n");
dynlinkUnloadD3D10API();
exit(EXIT_SUCCESS);
}
if (!findDXDevice(device_name)) // Search for D3D Hardware Device
{
printf("> D3D10 Graphics Device NOT found.. Exiting.\n");
dynlinkUnloadD3D10API();
exit(EXIT_SUCCESS);
}
// command line options
if (checkCmdLineFlag(argc, (const char **)argv, "file")) {
g_bQAReadback = true;
getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
}
//
// create window
//
// Register the window class
WNDCLASSEX wc = {sizeof(WNDCLASSEX),
CS_CLASSDC,
MsgProc,
0L,
0L,
GetModuleHandle(NULL),
NULL,
NULL,
NULL,
NULL,
"CUDA SDK",
NULL};
RegisterClassEx(&wc);
// Create the application's window
HWND hWnd = CreateWindow(wc.lpszClassName, "CUDA-SLI Interop, D3D10",
WS_OVERLAPPEDWINDOW, 0, 0, g_WindowWidth,
g_WindowHeight, NULL, NULL, wc.hInstance, NULL);
ShowWindow(hWnd, SW_SHOWDEFAULT);
UpdateWindow(hWnd);
// Initialize Direct3D
if (SUCCEEDED(InitD3D(hWnd)) && SUCCEEDED(InitTextures())) {
CUresult result = CUDA_SUCCESS;
cudaError_t error = cudaSuccess;
// get the list of interop devices
{
unsigned int interopDeviceCount = 0;
int interopDevices[32];
error = cudaD3D10GetDevices(&interopDeviceCount, interopDevices, 32,
g_pd3dDevice, cudaD3D10DeviceListAll);
printLastCudaError("cudaD3D10GetDevices failed"); // This prints and
// resets the cudaError
// to cudaSuccess
AssertOrQuit(cudaSuccess == error);
g_ContextCount = interopDeviceCount;
for (UINT i = 0; i < interopDeviceCount; ++i) {
g_ContextData[i].index = i;
g_ContextData[i].deviceOrdinal = interopDevices[i];
}
}
// Initialize g_ContextCount interop contexts on the device,
// striping across AFR groups
for (UINT i = 0; i < g_ContextCount; ++i) {
printf("Creating context %d on device %d\n", g_ContextData[i].index,
g_ContextData[i].deviceOrdinal);
// create a context
error = cudaD3D10SetDirect3DDevice(g_pd3dDevice,
g_ContextData[i].deviceOrdinal);
AssertOrQuit(cudaSuccess == error);
error = cudaFree(0);
AssertOrQuit(cudaSuccess == error);
// allocate a buffer
// error = cudaMalloc((void**)&g_ContextData[i].buffer, BYTES_PER_PIXEL);
cudaMallocPitch(&g_ContextData[i].cudaLinearMemory, &g_texture_2d.pitch,
g_texture_2d.width * sizeof(float) * 4,
g_texture_2d.height);
getLastCudaError("cudaMallocPitch (g_texture_2d) failed");
cudaMemset(g_ContextData[i].cudaLinearMemory, 1,
g_texture_2d.pitch * g_texture_2d.height);
AssertOrQuit(cudaSuccess == error);
// pop the context
result = cuCtxPopCurrent(&g_ContextData[i].context);
AssertOrQuit(CUDA_SUCCESS == result);
}
// Register the texture with all contexts
for (UINT i = 0; i < g_ContextCount; ++i) {
printf("Registering texture with context %d\n", i);
result = cuCtxPushCurrent(g_ContextData[i].context);
AssertOrQuit(CUDA_SUCCESS == result);
{
// Register the resource
error = cudaGraphicsD3D10RegisterResource(
&g_ContextData[i].cudaResource, g_texture_2d.pTexture,
cudaGraphicsRegisterFlagsNone);
getLastCudaError(
"cudaGraphicsD3D10RegisterResource (g_texture_2d) failed");
error = cudaGraphicsResourceSetMapFlags(g_ContextData[i].cudaResource,
cudaD3D10MapFlagsWriteDiscard);
getLastCudaError(
"cudaGraphicsResourceSetMapFlags (g_texture_2d) failed");
AssertOrQuit(cudaSuccess == error);
}
result = cuCtxPopCurrent(&g_ContextData[i].context);
AssertOrQuit(CUDA_SUCCESS == result);
}
}
//
// the main loop
//
while (false == g_bDone) {
Render();
//
// handle I/O
//
MSG msg;
ZeroMemory(&msg, sizeof(msg));
while (msg.message != WM_QUIT) {
if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE)) {
TranslateMessage(&msg);
DispatchMessage(&msg);
} else {
Render();
if (ref_file) {
for (int count = 0; count < g_iFrameToCompare; count++) {
Render();
}
const char *cur_image_path = "SLID3D10Texture.ppm";
// Save a reference of our current test run image
CheckRenderD3D10::ActiveRenderTargetToPPM(g_pd3dDevice,
cur_image_path);
// compare to offical reference image, printing PASS or FAIL.
g_bPassed = CheckRenderD3D10::PPMvsPPM(cur_image_path, ref_file,
argv[0], MAX_EPSILON, 0.15f);
g_bDone = true;
Cleanup();
PostQuitMessage(0);
} else {
g_bPassed = true;
}
}
}
};
// Unregister windows class
UnregisterClass(wc.lpszClassName, wc.hInstance);
//
// and exit
//
printf("> %s running on %s exiting...\n", SDK_name, device_name);
printf(g_bPassed ? "Test images compared OK\n"
: "Test images are Different!\n");
}
//-----------------------------------------------------------------------------
// Name: InitD3D()
// Desc: Initializes Direct3D
//-----------------------------------------------------------------------------
HRESULT InitD3D(HWND hWnd) {
// Set up the structure used to create the device and swapchain
DXGI_SWAP_CHAIN_DESC sd;
ZeroMemory(&sd, sizeof(sd));
sd.BufferCount = 1;
sd.BufferDesc.Width = g_WindowWidth;
sd.BufferDesc.Height = g_WindowHeight;
sd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
sd.BufferDesc.RefreshRate.Numerator = 60;
sd.BufferDesc.RefreshRate.Denominator = 1;
sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
sd.OutputWindow = hWnd;
sd.SampleDesc.Count = 1;
sd.SampleDesc.Quality = 0;
sd.Windowed = TRUE;
// Create device and swapchain
HRESULT hr = sFnPtr_D3D10CreateDeviceAndSwapChain(
g_pCudaCapableAdapter, D3D10_DRIVER_TYPE_HARDWARE, NULL, 0,
D3D10_SDK_VERSION, &sd, &g_pSwapChain, &g_pd3dDevice);
AssertOrQuit(SUCCEEDED(hr));
g_pCudaCapableAdapter->Release();
// Create a render target view of the swapchain
ID3D10Texture2D *pBuffer;
hr =
g_pSwapChain->GetBuffer(0, __uuidof(ID3D10Texture2D), (LPVOID *)&pBuffer);
AssertOrQuit(SUCCEEDED(hr));
hr = g_pd3dDevice->CreateRenderTargetView(pBuffer, NULL, &g_pSwapChainRTV);
AssertOrQuit(SUCCEEDED(hr));
pBuffer->Release();
g_pd3dDevice->OMSetRenderTargets(1, &g_pSwapChainRTV, NULL);
// Setup the viewport
D3D10_VIEWPORT vp;
vp.Width = g_WindowWidth;
vp.Height = g_WindowHeight;
vp.MinDepth = 0.0f;
vp.MaxDepth = 1.0f;
vp.TopLeftX = 0;
vp.TopLeftY = 0;
g_pd3dDevice->RSSetViewports(1, &vp);
// Setup the effect
{
ID3D10Blob *pCompiledEffect;
ID3D10Blob *pErrors = NULL;
hr = sFnPtr_D3D10CompileEffectFromMemory((void *)g_simpleEffectSrc,
sizeof(g_simpleEffectSrc), NULL,
NULL, // pDefines
NULL, // pIncludes
0, // HLSL flags
0, // FXFlags
&pCompiledEffect, &pErrors);
if (pErrors) {
LPVOID l_pError = NULL;
l_pError = pErrors->GetBufferPointer(); // then cast to a char* to see it
// in the locals window
fprintf(stdout, "Compilation error: \n %s", (char *)l_pError);
}
AssertOrQuit(SUCCEEDED(hr));
hr = sFnPtr_D3D10CreateEffectFromMemory(
pCompiledEffect->GetBufferPointer(), pCompiledEffect->GetBufferSize(),
0, // FXFlags
g_pd3dDevice, NULL, &g_pSimpleEffect);
pCompiledEffect->Release();
g_pSimpleTechnique = g_pSimpleEffect->GetTechniqueByName("Render");
g_pvQuadRect =
g_pSimpleEffect->GetVariableByName("g_vQuadRect")->AsVector();
g_pTexture2D =
g_pSimpleEffect->GetVariableByName("g_Texture2D")->AsShaderResource();
// Setup no Input Layout
g_pd3dDevice->IASetInputLayout(0);
g_pd3dDevice->IASetPrimitiveTopology(
D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
}
D3D10_RASTERIZER_DESC rasterizerState;
rasterizerState.FillMode = D3D10_FILL_SOLID;
rasterizerState.CullMode = D3D10_CULL_FRONT;
rasterizerState.FrontCounterClockwise = false;
rasterizerState.DepthBias = false;
rasterizerState.DepthBiasClamp = 0;
rasterizerState.SlopeScaledDepthBias = 0;
rasterizerState.DepthClipEnable = false;
rasterizerState.ScissorEnable = false;
rasterizerState.MultisampleEnable = false;
rasterizerState.AntialiasedLineEnable = false;
g_pd3dDevice->CreateRasterizerState(&rasterizerState, &g_pRasterState);
g_pd3dDevice->RSSetState(g_pRasterState);
return S_OK;
}
//-----------------------------------------------------------------------------
// Name: InitTextures()
// Desc: Initializes Direct3D Textures (allocation and initialization)
//-----------------------------------------------------------------------------
HRESULT InitTextures() {
//
// create the D3D resources we'll be using
//
// 2D texture
{
g_texture_2d.width = 768;
g_texture_2d.height = 768;
D3D10_TEXTURE2D_DESC desc;
ZeroMemory(&desc, sizeof(D3D10_TEXTURE2D_DESC));
desc.Width = g_texture_2d.width;
desc.Height = g_texture_2d.height;
desc.MipLevels = 1;
desc.ArraySize = 1;
desc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
desc.SampleDesc.Count = 1;
desc.Usage = D3D10_USAGE_DEFAULT;
desc.BindFlags = D3D10_BIND_SHADER_RESOURCE;
if (FAILED(
g_pd3dDevice->CreateTexture2D(&desc, NULL, &g_texture_2d.pTexture)))
return E_FAIL;
if (FAILED(g_pd3dDevice->CreateShaderResourceView(
g_texture_2d.pTexture, NULL, &g_texture_2d.pSRView)))
return E_FAIL;
g_pTexture2D->SetResource(g_texture_2d.pSRView);
}
return S_OK;
}
////////////////////////////////////////////////////////////////////////////////
//! Run the Cuda part of the computation
////////////////////////////////////////////////////////////////////////////////
int RunKernels(CudaContextData *currentContextData) {
static float t = 0.0f;
// populate the 2d texture
{
cudaArray *cuArray;
cudaGraphicsSubResourceGetMappedArray(
&cuArray, currentContextData->cudaResource, 0, 0);
getLastCudaError(
"cudaGraphicsSubResourceGetMappedArray (cuda_texture_2d) failed");
// kick off the kernel and send the staging buffer cudaLinearMemory as an
// argument to allow the kernel to write to it
cuda_texture_2d(currentContextData->cudaLinearMemory, g_texture_2d.width,
g_texture_2d.height, g_texture_2d.pitch,
g_bQAReadback ? 0.2f : t);
getLastCudaError("cuda_texture_2d failed");
// then we want to copy cudaLinearMemory to the D3D texture, via its mapped
// form : cudaArray
cudaMemcpy2DToArray(
cuArray, // dst array
0, 0, // offset
currentContextData->cudaLinearMemory, g_texture_2d.pitch, // src
g_texture_2d.width * 4 * sizeof(float), g_texture_2d.height, // extent
cudaMemcpyDeviceToDevice); // kind
getLastCudaError("cudaMemcpy2DToArray failed");
}
t += 0.1f;
return 0;
}
////////////////////////////////////////////////////////////////////////////////
//! Draw the final result on the screen
////////////////////////////////////////////////////////////////////////////////
void DrawScene() {
// Clear the backbuffer to a black color
float ClearColor[4] = {0.5f, 0.5f, 0.6f, 1.0f};
g_pd3dDevice->ClearRenderTargetView(g_pSwapChainRTV, ClearColor);
//
// draw the 2d texture
//
float quadRect[4] = {-0.98f, -0.98f, 1.96f, 1.96f};
g_pvQuadRect->SetFloatVector((float *)&quadRect);
g_pSimpleTechnique->GetPassByIndex(0)->Apply(0);
g_pd3dDevice->Draw(4, 0);
// Present the backbuffer contents to the display
g_pSwapChain->Present(0, 0);
}
//-----------------------------------------------------------------------------
// Name: Cleanup()
// Desc: Releases all previously initialized objects
//-----------------------------------------------------------------------------
int Cleanup() {
// unregister the Cuda resources
CUresult result = CUDA_SUCCESS;
cudaError_t error = cudaSuccess;
// Drop the D3D resources' refcounts
// Unregister the texture with all contexts
for (UINT i = 0; i < g_ContextCount; ++i) {
printf("Unregistering texture with context %d\n", i);
result = cuCtxPushCurrent(g_ContextData[i].context);
AssertOrQuit(CUDA_SUCCESS == result);
{
// Register the resource
error = cudaGraphicsUnregisterResource(g_ContextData[i].cudaResource);
AssertOrQuit(cudaSuccess == error);
}
result = cuCtxPopCurrent(&g_ContextData[i].context);
AssertOrQuit(CUDA_SUCCESS == result);
}
// Destroy all contexts
for (UINT i = 0; i < g_ContextCount; ++i) {
printf("Destroying context %d\n", i);
result = cuCtxPushCurrent(g_ContextData[i].context);
AssertOrQuit(CUDA_SUCCESS == result);
}
//
// clean up Direct3D
//
{
// release the resources we created
g_texture_2d.pSRView->Release();
g_texture_2d.pTexture->Release();
if (g_pInputLayout != NULL) g_pInputLayout->Release();
if (g_pSimpleEffect != NULL) g_pSimpleEffect->Release();
if (g_pSwapChainRTV != NULL) g_pSwapChainRTV->Release();
if (g_pSwapChain != NULL) g_pSwapChain->Release();
if (g_pd3dDevice != NULL) g_pd3dDevice->Release();
}
return 0;
}
//-----------------------------------------------------------------------------
// Name: Render()
// Desc: Launches the CUDA kernels to fill in the texture data
//-----------------------------------------------------------------------------
int Render() {
//
// map the resources we've registered so we can access them in Cuda
// - it is most efficient to map and unmap all resources in a single call,
// and to have the map/unmap calls be the boundary between using the GPU
// for Direct3D and Cuda
//
{
cudaStream_t stream = 0;
cudaError_t error = cudaSuccess;
CudaContextData *currentContextData = NULL;
// get the current device ordinal
static int currentDevice = -1;
error = cudaD3D10GetDevices(NULL, &currentDevice, 1, g_pd3dDevice,
cudaD3D10DeviceListCurrentFrame);
printLastCudaError("cudaD3D10GetDevices failed"); // This prints and resets
// the cudaError to
// cudaSuccess
AssertOrQuit(cudaSuccess == error);
static int nextDevice = -1;
// assert that querying the next device in AFR isn't broken
AssertOrQuit(nextDevice == -1 || nextDevice == currentDevice);
error = cudaD3D10GetDevices(NULL, &nextDevice, 1, g_pd3dDevice,
cudaD3D10DeviceListNextFrame);
printLastCudaError("cudaD3D10GetDevices failed"); // This prints and resets
// the cudaError to
// cudaSuccess
AssertOrQuit(cudaSuccess == error);
// choose context data corresponding to the current device ordinal
for (UINT i = 0; i < g_ContextCount; ++i) {
if (currentDevice == g_ContextData[i].deviceOrdinal) {
currentContextData = &g_ContextData[i];
}
}
AssertOrQuit(currentContextData);
CUresult result;
result = cuCtxPushCurrent(currentContextData->context);
AssertOrQuit(CUDA_SUCCESS == result);
cudaGraphicsMapResources(1, &currentContextData->cudaResource, stream);
getLastCudaError("cudaGraphicsMapResources(3) failed");
//
// run kernels which will populate the contents of those textures
//
RunKernels(currentContextData);
//
// unmap the resources
//
cudaGraphicsUnmapResources(1, &currentContextData->cudaResource, stream);
getLastCudaError("cudaGraphicsUnmapResources(3) failed");
CUcontext poppedContext;
result = cuCtxPopCurrent(&poppedContext);
AssertOrQuit(CUDA_SUCCESS == result);
AssertOrQuit(poppedContext == currentContextData->context);
}
//
// draw the scene using them
//
DrawScene();
return 0;
}
//-----------------------------------------------------------------------------
// Name: MsgProc()
// Desc: The window's message handler
//-----------------------------------------------------------------------------
static LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam,
LPARAM lParam) {
switch (msg) {
case WM_KEYDOWN:
if (wParam == VK_ESCAPE) {
g_bDone = true;
Cleanup();
PostQuitMessage(0);
return 0;
}
break;
case WM_DESTROY:
g_bDone = true;
Cleanup();
PostQuitMessage(0);
return 0;
case WM_PAINT:
ValidateRect(hWnd, NULL);
return 0;
}
return DefWindowProc(hWnd, msg, wParam, lParam);
}