mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-07-02 05:10:31 +08:00
Remove legacy Direct3D 9 and 10 interoperability samples
This commit is contained in:
parent
37922e6429
commit
035dcfd357
@ -15,6 +15,15 @@
|
||||
* `cuHook` demonstrating dlsym hooks. (reason: incompatible with modern `glibc`)
|
||||
* `4_CUDA_Libraries`
|
||||
* `batchedLabelMarkersAndLabelCompressionNPP` demonstrating NPP features (reason: some functionality removed from library)
|
||||
* `5_Domain_Specific`
|
||||
* Legacy Direct3D 9 and 10 interoperability samples:
|
||||
* `simpleD3D10`
|
||||
* `simpleD3D10RenderTarget`
|
||||
* `simpleD3D10Texture`
|
||||
* `simpleD3D9`
|
||||
* `simpleD3D9Texture`
|
||||
* `SLID3D10Texture`
|
||||
* `VFlockingD3D10`
|
||||
|
||||
|
||||
|
||||
|
@ -1,46 +0,0 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Target rules
|
||||
all: build
|
||||
|
||||
build:
|
||||
$(info >>> WARNING - SLID3D10Texture is not supported on Linux - waiving sample <<<)
|
||||
|
||||
run: build
|
||||
|
||||
testrun: build
|
||||
|
||||
clean:
|
||||
|
||||
clobber: clean
|
@ -1,52 +0,0 @@
|
||||
# SLID3D10Texture - SLI D3D10 Texture
|
||||
|
||||
## Description
|
||||
|
||||
Simple program which demonstrates SLI with Direct3D10 Texture interoperability with CUDA. The program creates a D3D10 Texture which is written to from a CUDA kernel. Direct3D then renders the results on the screen. A Direct3D Capable device is required.
|
||||
|
||||
## Key Concepts
|
||||
|
||||
Performance Strategies, Graphics Interop, Image Processing, 2D Textures
|
||||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
Windows
|
||||
|
||||
## Supported CPU Architecture
|
||||
|
||||
x86_64
|
||||
|
||||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
|
||||
cuCtxPushCurrent, cuCtxPopCurrent
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaGetErrorString, cudaFree, cudaGraphicsResourceSetMapFlags, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGetDeviceProperties
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[DirectX](../../../README.md#directx)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
||||
### Windows
|
||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
||||
```
|
||||
*_vs<version>.sln - for Visual Studio <version>
|
||||
```
|
||||
Each individual sample has its own set of solution files in its directory:
|
||||
|
||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
||||
|
||||
## References (for more details)
|
||||
|
@ -1,844 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
//
|
||||
// This example demonstrates interoperability of SLI with a Direct3D10 texture
|
||||
// and CUDA. The program creates a D3D10 texture which is written from
|
||||
// a CUDA kernel. Direct3D then renders the result on the screen.
|
||||
// A Direct3D Capable device is required.
|
||||
//
|
||||
|
||||
#pragma warning(disable : 4312)
|
||||
|
||||
#include <windows.h>
|
||||
#include <mmsystem.h>
|
||||
|
||||
#pragma warning(disable : 4996) // disable deprecated warning
|
||||
#include <strsafe.h>
|
||||
#pragma warning(default : 4996)
|
||||
|
||||
// this header includes all the necessary D3D10 includes
|
||||
#include <dynlink_d3d10.h>
|
||||
|
||||
// includes, cuda
|
||||
#include <cuda.h>
|
||||
#include <builtin_types.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_d3d10_interop.h>
|
||||
#include <d3d10.h>
|
||||
|
||||
// includes, project
|
||||
#include <rendercheck_d3d10.h> // automated testing
|
||||
#include <helper_cuda.h> // helper for CUDA error checking and initialization
|
||||
|
||||
#define MAX_EPSILON 10
|
||||
|
||||
static char *SDK_name = "SLID3D10Texture";
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Global variables
|
||||
//-----------------------------------------------------------------------------
|
||||
IDXGIAdapter *g_pCudaCapableAdapter = NULL; // Adapter to use
|
||||
ID3D10Device *g_pd3dDevice = NULL; // Our rendering device
|
||||
IDXGISwapChain *g_pSwapChain = NULL; // The swap chain of the window
|
||||
ID3D10RenderTargetView *g_pSwapChainRTV =
|
||||
NULL; // The Render target view on the swap chain ( used for clear)
|
||||
ID3D10RasterizerState *g_pRasterState = NULL;
|
||||
|
||||
ID3D10InputLayout *g_pInputLayout = NULL;
|
||||
ID3D10Effect *g_pSimpleEffect = NULL;
|
||||
ID3D10EffectTechnique *g_pSimpleTechnique = NULL;
|
||||
ID3D10EffectVectorVariable *g_pvQuadRect = NULL;
|
||||
ID3D10EffectShaderResourceVariable *g_pTexture2D = NULL;
|
||||
|
||||
static const char g_simpleEffectSrc[] =
|
||||
"float4 g_vQuadRect; \n"
|
||||
"Texture2D g_Texture2D; \n"
|
||||
"\n"
|
||||
"SamplerState samLinear{ \n"
|
||||
" Filter = MIN_MAG_LINEAR_MIP_POINT; \n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"struct Fragment{ \n"
|
||||
" float4 Pos : SV_POSITION;\n"
|
||||
" float3 Tex : TEXCOORD0; };\n"
|
||||
"\n"
|
||||
"Fragment VS( uint vertexId : SV_VertexID )\n"
|
||||
"{\n"
|
||||
" Fragment f;\n"
|
||||
" f.Tex = float3( 0.f, 0.f, 0.f); \n"
|
||||
" if (vertexId == 1) f.Tex.x = 1.f; \n"
|
||||
" else if (vertexId == 2) f.Tex.y = 1.f; \n"
|
||||
" else if (vertexId == 3) f.Tex.xy = float2(1.f, 1.f); \n"
|
||||
" \n"
|
||||
" f.Pos = float4( g_vQuadRect.xy + f.Tex * g_vQuadRect.zw, 0, 1);\n"
|
||||
" \n"
|
||||
" return f;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"float4 PS( Fragment f ) : SV_Target\n"
|
||||
"{\n"
|
||||
" // return g_Texture2D.Sample( samLinear, f.Tex.xy ); \n"
|
||||
" // return float4(f.Tex, 1);\n"
|
||||
" float4 g = g_Texture2D.Sample( samLinear, f.Tex.xy );"
|
||||
" for (int i = 0; i < 1024; ++i) { "
|
||||
" g.x = sqrt(g.x);"
|
||||
" g.x += 0.0001;"
|
||||
" g.x = g.x * g.x;"
|
||||
" }"
|
||||
" return g;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"technique10 Render\n"
|
||||
"{\n"
|
||||
" pass P0\n"
|
||||
" {\n"
|
||||
" SetVertexShader( CompileShader( vs_4_0, VS() ) );\n"
|
||||
" SetGeometryShader( NULL );\n"
|
||||
" SetPixelShader( CompileShader( ps_4_0, PS() ) );\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n";
|
||||
|
||||
// testing/tracing function used pervasively in tests. If the condition is
|
||||
// unsatisfied
|
||||
// then spew and fail the function immediately (doing no cleanup)
|
||||
#define AssertOrQuit(x) \
|
||||
if (!(x)) { \
|
||||
fprintf(stdout, "Assert unsatisfied in %s at %s:%d\n", __FUNCTION__, \
|
||||
__FILE__, __LINE__); \
|
||||
return 1; \
|
||||
}
|
||||
|
||||
bool g_bDone = false;
|
||||
bool g_bPassed = true;
|
||||
|
||||
int *pArgc = NULL;
|
||||
char **pArgv = NULL;
|
||||
|
||||
const unsigned int g_WindowWidth = 720;
|
||||
const unsigned int g_WindowHeight = 720;
|
||||
|
||||
bool g_bQAReadback = false;
|
||||
int g_iFrameToCompare = 1;
|
||||
|
||||
struct CudaContextData {
|
||||
UINT index;
|
||||
CUcontext context;
|
||||
int deviceOrdinal;
|
||||
cudaGraphicsResource *cudaResource;
|
||||
void *cudaLinearMemory;
|
||||
};
|
||||
|
||||
UINT g_ContextCount = 0;
|
||||
CudaContextData g_ContextData[32];
|
||||
|
||||
// Data structure for 2D texture shared between DX10 and CUDA
|
||||
struct {
|
||||
ID3D10Texture2D *pTexture;
|
||||
ID3D10ShaderResourceView *pSRView;
|
||||
size_t pitch;
|
||||
int width;
|
||||
int height;
|
||||
} g_texture_2d;
|
||||
|
||||
// The CUDA kernel launchers that get called
|
||||
extern "C" {
|
||||
bool cuda_texture_2d(void *surface, size_t width, size_t height, size_t pitch,
|
||||
float t);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Forward declarations
|
||||
//-----------------------------------------------------------------------------
|
||||
HRESULT InitD3D(HWND hWnd);
|
||||
HRESULT InitTextures();
|
||||
|
||||
int RunKernels(CudaContextData *currentContextData);
|
||||
void DrawScene();
|
||||
int Cleanup();
|
||||
int Render();
|
||||
|
||||
LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
|
||||
|
||||
#define NAME_LEN 512
|
||||
|
||||
bool findCUDADevice() {
|
||||
int nGraphicsGPU = 0;
|
||||
int deviceCount = 0;
|
||||
bool bFoundGraphics = false;
|
||||
char devname[NAME_LEN];
|
||||
|
||||
// This function call returns 0 if there are no CUDA capable devices.
|
||||
cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
|
||||
|
||||
if (error_id != cudaSuccess) {
|
||||
printf("cudaGetDeviceCount returned %d\n-> %s\n", (int)error_id,
|
||||
cudaGetErrorString(error_id));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (deviceCount == 0) {
|
||||
printf("> There are no device(s) supporting CUDA\n");
|
||||
return false;
|
||||
} else {
|
||||
printf("> Found %d CUDA Capable Device(s)\n", deviceCount);
|
||||
}
|
||||
|
||||
// Get CUDA device properties
|
||||
cudaDeviceProp deviceProp;
|
||||
|
||||
for (int dev = 0; dev < deviceCount; ++dev) {
|
||||
cudaGetDeviceProperties(&deviceProp, dev);
|
||||
STRCPY(devname, NAME_LEN, deviceProp.name);
|
||||
printf("> GPU %d: %s\n", dev, devname);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool findDXDevice(char *dev_name) {
|
||||
HRESULT hr = S_OK;
|
||||
cudaError cuStatus;
|
||||
|
||||
// Iterate through the candidate adapters
|
||||
IDXGIFactory *pFactory;
|
||||
hr = sFnPtr_CreateDXGIFactory(__uuidof(IDXGIFactory), (void **)(&pFactory));
|
||||
|
||||
if (!SUCCEEDED(hr)) {
|
||||
printf("> No DXGI Factory created.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
UINT adapter = 0;
|
||||
|
||||
for (; !g_pCudaCapableAdapter; ++adapter) {
|
||||
// Get a candidate DXGI adapter
|
||||
IDXGIAdapter *pAdapter = NULL;
|
||||
hr = pFactory->EnumAdapters(adapter, &pAdapter);
|
||||
|
||||
if (FAILED(hr)) {
|
||||
break; // no compatible adapters found
|
||||
}
|
||||
|
||||
// Query to see if there exists a corresponding compute device
|
||||
int cuDevice;
|
||||
cuStatus = cudaD3D10GetDevice(&cuDevice, pAdapter);
|
||||
printLastCudaError("cudaD3D10GetDevice failed"); // This prints and resets
|
||||
// the cudaError to
|
||||
// cudaSuccess
|
||||
|
||||
if (cudaSuccess == cuStatus) {
|
||||
// If so, mark it as the one against which to create our d3d10 device
|
||||
g_pCudaCapableAdapter = pAdapter;
|
||||
g_pCudaCapableAdapter->AddRef();
|
||||
}
|
||||
|
||||
pAdapter->Release();
|
||||
}
|
||||
|
||||
printf("> Found %d D3D10 Adapater(s).\n", (int)adapter);
|
||||
|
||||
pFactory->Release();
|
||||
|
||||
if (!g_pCudaCapableAdapter) {
|
||||
printf("> Found 0 D3D10 Adapater(s) /w Compute capability.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
DXGI_ADAPTER_DESC adapterDesc;
|
||||
g_pCudaCapableAdapter->GetDesc(&adapterDesc);
|
||||
wcstombs_s(NULL, dev_name, 256, adapterDesc.Description, 128);
|
||||
|
||||
printf("> Found 1 D3D10 Adapater(s) /w Compute capability.\n");
|
||||
printf("> %s\n", dev_name);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Program main
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int main(int argc, char *argv[]) {
|
||||
char device_name[256];
|
||||
char *ref_file = NULL;
|
||||
|
||||
pArgc = &argc;
|
||||
pArgv = argv;
|
||||
|
||||
printf("%s Starting...\n\n", SDK_name);
|
||||
|
||||
if (!findCUDADevice()) // Search for CUDA GPU
|
||||
{
|
||||
printf("> CUDA Device NOT found on \"%s\".. Exiting.\n", device_name);
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
if (!dynlinkLoadD3D10API()) // Search for D3D API (locate drivers, does not
|
||||
// mean device is found)
|
||||
{
|
||||
printf("> D3D10 API libraries NOT found on.. Exiting.\n");
|
||||
dynlinkUnloadD3D10API();
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
if (!findDXDevice(device_name)) // Search for D3D Hardware Device
|
||||
{
|
||||
printf("> D3D10 Graphics Device NOT found.. Exiting.\n");
|
||||
dynlinkUnloadD3D10API();
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
// command line options
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "file")) {
|
||||
g_bQAReadback = true;
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
|
||||
}
|
||||
|
||||
//
|
||||
// create window
|
||||
//
|
||||
// Register the window class
|
||||
WNDCLASSEX wc = {sizeof(WNDCLASSEX),
|
||||
CS_CLASSDC,
|
||||
MsgProc,
|
||||
0L,
|
||||
0L,
|
||||
GetModuleHandle(NULL),
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
"CUDA SDK",
|
||||
NULL};
|
||||
RegisterClassEx(&wc);
|
||||
|
||||
// Create the application's window
|
||||
HWND hWnd = CreateWindow(wc.lpszClassName, "CUDA-SLI Interop, D3D10",
|
||||
WS_OVERLAPPEDWINDOW, 0, 0, g_WindowWidth,
|
||||
g_WindowHeight, NULL, NULL, wc.hInstance, NULL);
|
||||
|
||||
ShowWindow(hWnd, SW_SHOWDEFAULT);
|
||||
UpdateWindow(hWnd);
|
||||
|
||||
// Initialize Direct3D
|
||||
if (SUCCEEDED(InitD3D(hWnd)) && SUCCEEDED(InitTextures())) {
|
||||
CUresult result = CUDA_SUCCESS;
|
||||
cudaError_t error = cudaSuccess;
|
||||
|
||||
// get the list of interop devices
|
||||
{
|
||||
unsigned int interopDeviceCount = 0;
|
||||
int interopDevices[32];
|
||||
error = cudaD3D10GetDevices(&interopDeviceCount, interopDevices, 32,
|
||||
g_pd3dDevice, cudaD3D10DeviceListAll);
|
||||
printLastCudaError("cudaD3D10GetDevices failed"); // This prints and
|
||||
// resets the cudaError
|
||||
// to cudaSuccess
|
||||
AssertOrQuit(cudaSuccess == error);
|
||||
|
||||
g_ContextCount = interopDeviceCount;
|
||||
|
||||
for (UINT i = 0; i < interopDeviceCount; ++i) {
|
||||
g_ContextData[i].index = i;
|
||||
g_ContextData[i].deviceOrdinal = interopDevices[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize g_ContextCount interop contexts on the device,
|
||||
// striping across AFR groups
|
||||
for (UINT i = 0; i < g_ContextCount; ++i) {
|
||||
printf("Creating context %d on device %d\n", g_ContextData[i].index,
|
||||
g_ContextData[i].deviceOrdinal);
|
||||
|
||||
// create a context
|
||||
error = cudaD3D10SetDirect3DDevice(g_pd3dDevice,
|
||||
g_ContextData[i].deviceOrdinal);
|
||||
AssertOrQuit(cudaSuccess == error);
|
||||
error = cudaFree(0);
|
||||
AssertOrQuit(cudaSuccess == error);
|
||||
|
||||
// allocate a buffer
|
||||
// error = cudaMalloc((void**)&g_ContextData[i].buffer, BYTES_PER_PIXEL);
|
||||
cudaMallocPitch(&g_ContextData[i].cudaLinearMemory, &g_texture_2d.pitch,
|
||||
g_texture_2d.width * sizeof(float) * 4,
|
||||
g_texture_2d.height);
|
||||
getLastCudaError("cudaMallocPitch (g_texture_2d) failed");
|
||||
cudaMemset(g_ContextData[i].cudaLinearMemory, 1,
|
||||
g_texture_2d.pitch * g_texture_2d.height);
|
||||
|
||||
AssertOrQuit(cudaSuccess == error);
|
||||
|
||||
// pop the context
|
||||
result = cuCtxPopCurrent(&g_ContextData[i].context);
|
||||
AssertOrQuit(CUDA_SUCCESS == result);
|
||||
}
|
||||
|
||||
// Register the texture with all contexts
|
||||
for (UINT i = 0; i < g_ContextCount; ++i) {
|
||||
printf("Registering texture with context %d\n", i);
|
||||
result = cuCtxPushCurrent(g_ContextData[i].context);
|
||||
AssertOrQuit(CUDA_SUCCESS == result);
|
||||
{
|
||||
// Register the resource
|
||||
error = cudaGraphicsD3D10RegisterResource(
|
||||
&g_ContextData[i].cudaResource, g_texture_2d.pTexture,
|
||||
cudaGraphicsRegisterFlagsNone);
|
||||
getLastCudaError(
|
||||
"cudaGraphicsD3D10RegisterResource (g_texture_2d) failed");
|
||||
|
||||
error = cudaGraphicsResourceSetMapFlags(g_ContextData[i].cudaResource,
|
||||
cudaD3D10MapFlagsWriteDiscard);
|
||||
getLastCudaError(
|
||||
"cudaGraphicsResourceSetMapFlags (g_texture_2d) failed");
|
||||
|
||||
AssertOrQuit(cudaSuccess == error);
|
||||
}
|
||||
result = cuCtxPopCurrent(&g_ContextData[i].context);
|
||||
AssertOrQuit(CUDA_SUCCESS == result);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// the main loop
|
||||
//
|
||||
while (false == g_bDone) {
|
||||
Render();
|
||||
|
||||
//
|
||||
// handle I/O
|
||||
//
|
||||
MSG msg;
|
||||
ZeroMemory(&msg, sizeof(msg));
|
||||
|
||||
while (msg.message != WM_QUIT) {
|
||||
if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE)) {
|
||||
TranslateMessage(&msg);
|
||||
DispatchMessage(&msg);
|
||||
} else {
|
||||
Render();
|
||||
|
||||
if (ref_file) {
|
||||
for (int count = 0; count < g_iFrameToCompare; count++) {
|
||||
Render();
|
||||
}
|
||||
|
||||
const char *cur_image_path = "SLID3D10Texture.ppm";
|
||||
|
||||
// Save a reference of our current test run image
|
||||
CheckRenderD3D10::ActiveRenderTargetToPPM(g_pd3dDevice,
|
||||
cur_image_path);
|
||||
|
||||
// compare to offical reference image, printing PASS or FAIL.
|
||||
g_bPassed = CheckRenderD3D10::PPMvsPPM(cur_image_path, ref_file,
|
||||
argv[0], MAX_EPSILON, 0.15f);
|
||||
|
||||
g_bDone = true;
|
||||
|
||||
Cleanup();
|
||||
|
||||
PostQuitMessage(0);
|
||||
} else {
|
||||
g_bPassed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Unregister windows class
|
||||
UnregisterClass(wc.lpszClassName, wc.hInstance);
|
||||
|
||||
//
|
||||
// and exit
|
||||
//
|
||||
printf("> %s running on %s exiting...\n", SDK_name, device_name);
|
||||
|
||||
printf(g_bPassed ? "Test images compared OK\n"
|
||||
: "Test images are Different!\n");
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: InitD3D()
|
||||
// Desc: Initializes Direct3D
|
||||
//-----------------------------------------------------------------------------
|
||||
HRESULT InitD3D(HWND hWnd) {
|
||||
// Set up the structure used to create the device and swapchain
|
||||
DXGI_SWAP_CHAIN_DESC sd;
|
||||
ZeroMemory(&sd, sizeof(sd));
|
||||
sd.BufferCount = 1;
|
||||
sd.BufferDesc.Width = g_WindowWidth;
|
||||
sd.BufferDesc.Height = g_WindowHeight;
|
||||
sd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
sd.BufferDesc.RefreshRate.Numerator = 60;
|
||||
sd.BufferDesc.RefreshRate.Denominator = 1;
|
||||
sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
|
||||
sd.OutputWindow = hWnd;
|
||||
sd.SampleDesc.Count = 1;
|
||||
sd.SampleDesc.Quality = 0;
|
||||
sd.Windowed = TRUE;
|
||||
|
||||
// Create device and swapchain
|
||||
HRESULT hr = sFnPtr_D3D10CreateDeviceAndSwapChain(
|
||||
g_pCudaCapableAdapter, D3D10_DRIVER_TYPE_HARDWARE, NULL, 0,
|
||||
D3D10_SDK_VERSION, &sd, &g_pSwapChain, &g_pd3dDevice);
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
g_pCudaCapableAdapter->Release();
|
||||
|
||||
// Create a render target view of the swapchain
|
||||
ID3D10Texture2D *pBuffer;
|
||||
hr =
|
||||
g_pSwapChain->GetBuffer(0, __uuidof(ID3D10Texture2D), (LPVOID *)&pBuffer);
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
|
||||
hr = g_pd3dDevice->CreateRenderTargetView(pBuffer, NULL, &g_pSwapChainRTV);
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
pBuffer->Release();
|
||||
|
||||
g_pd3dDevice->OMSetRenderTargets(1, &g_pSwapChainRTV, NULL);
|
||||
|
||||
// Setup the viewport
|
||||
D3D10_VIEWPORT vp;
|
||||
vp.Width = g_WindowWidth;
|
||||
vp.Height = g_WindowHeight;
|
||||
vp.MinDepth = 0.0f;
|
||||
vp.MaxDepth = 1.0f;
|
||||
vp.TopLeftX = 0;
|
||||
vp.TopLeftY = 0;
|
||||
g_pd3dDevice->RSSetViewports(1, &vp);
|
||||
|
||||
// Setup the effect
|
||||
{
|
||||
ID3D10Blob *pCompiledEffect;
|
||||
ID3D10Blob *pErrors = NULL;
|
||||
hr = sFnPtr_D3D10CompileEffectFromMemory((void *)g_simpleEffectSrc,
|
||||
sizeof(g_simpleEffectSrc), NULL,
|
||||
NULL, // pDefines
|
||||
NULL, // pIncludes
|
||||
0, // HLSL flags
|
||||
0, // FXFlags
|
||||
&pCompiledEffect, &pErrors);
|
||||
|
||||
if (pErrors) {
|
||||
LPVOID l_pError = NULL;
|
||||
l_pError = pErrors->GetBufferPointer(); // then cast to a char* to see it
|
||||
// in the locals window
|
||||
fprintf(stdout, "Compilation error: \n %s", (char *)l_pError);
|
||||
}
|
||||
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
|
||||
hr = sFnPtr_D3D10CreateEffectFromMemory(
|
||||
pCompiledEffect->GetBufferPointer(), pCompiledEffect->GetBufferSize(),
|
||||
0, // FXFlags
|
||||
g_pd3dDevice, NULL, &g_pSimpleEffect);
|
||||
pCompiledEffect->Release();
|
||||
|
||||
g_pSimpleTechnique = g_pSimpleEffect->GetTechniqueByName("Render");
|
||||
|
||||
g_pvQuadRect =
|
||||
g_pSimpleEffect->GetVariableByName("g_vQuadRect")->AsVector();
|
||||
|
||||
g_pTexture2D =
|
||||
g_pSimpleEffect->GetVariableByName("g_Texture2D")->AsShaderResource();
|
||||
|
||||
// Setup no Input Layout
|
||||
g_pd3dDevice->IASetInputLayout(0);
|
||||
g_pd3dDevice->IASetPrimitiveTopology(
|
||||
D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
||||
}
|
||||
|
||||
D3D10_RASTERIZER_DESC rasterizerState;
|
||||
rasterizerState.FillMode = D3D10_FILL_SOLID;
|
||||
rasterizerState.CullMode = D3D10_CULL_FRONT;
|
||||
rasterizerState.FrontCounterClockwise = false;
|
||||
rasterizerState.DepthBias = false;
|
||||
rasterizerState.DepthBiasClamp = 0;
|
||||
rasterizerState.SlopeScaledDepthBias = 0;
|
||||
rasterizerState.DepthClipEnable = false;
|
||||
rasterizerState.ScissorEnable = false;
|
||||
rasterizerState.MultisampleEnable = false;
|
||||
rasterizerState.AntialiasedLineEnable = false;
|
||||
g_pd3dDevice->CreateRasterizerState(&rasterizerState, &g_pRasterState);
|
||||
g_pd3dDevice->RSSetState(g_pRasterState);
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: InitTextures()
|
||||
// Desc: Initializes Direct3D Textures (allocation and initialization)
|
||||
//-----------------------------------------------------------------------------
|
||||
HRESULT InitTextures() {
|
||||
//
|
||||
// create the D3D resources we'll be using
|
||||
//
|
||||
// 2D texture
|
||||
{
|
||||
g_texture_2d.width = 768;
|
||||
g_texture_2d.height = 768;
|
||||
|
||||
D3D10_TEXTURE2D_DESC desc;
|
||||
ZeroMemory(&desc, sizeof(D3D10_TEXTURE2D_DESC));
|
||||
desc.Width = g_texture_2d.width;
|
||||
desc.Height = g_texture_2d.height;
|
||||
desc.MipLevels = 1;
|
||||
desc.ArraySize = 1;
|
||||
desc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
|
||||
desc.SampleDesc.Count = 1;
|
||||
desc.Usage = D3D10_USAGE_DEFAULT;
|
||||
desc.BindFlags = D3D10_BIND_SHADER_RESOURCE;
|
||||
|
||||
if (FAILED(
|
||||
g_pd3dDevice->CreateTexture2D(&desc, NULL, &g_texture_2d.pTexture)))
|
||||
return E_FAIL;
|
||||
|
||||
if (FAILED(g_pd3dDevice->CreateShaderResourceView(
|
||||
g_texture_2d.pTexture, NULL, &g_texture_2d.pSRView)))
|
||||
return E_FAIL;
|
||||
|
||||
g_pTexture2D->SetResource(g_texture_2d.pSRView);
|
||||
}
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Run the Cuda part of the computation
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int RunKernels(CudaContextData *currentContextData) {
|
||||
static float t = 0.0f;
|
||||
|
||||
// populate the 2d texture
|
||||
{
|
||||
cudaArray *cuArray;
|
||||
cudaGraphicsSubResourceGetMappedArray(
|
||||
&cuArray, currentContextData->cudaResource, 0, 0);
|
||||
getLastCudaError(
|
||||
"cudaGraphicsSubResourceGetMappedArray (cuda_texture_2d) failed");
|
||||
|
||||
// kick off the kernel and send the staging buffer cudaLinearMemory as an
|
||||
// argument to allow the kernel to write to it
|
||||
cuda_texture_2d(currentContextData->cudaLinearMemory, g_texture_2d.width,
|
||||
g_texture_2d.height, g_texture_2d.pitch,
|
||||
g_bQAReadback ? 0.2f : t);
|
||||
getLastCudaError("cuda_texture_2d failed");
|
||||
|
||||
// then we want to copy cudaLinearMemory to the D3D texture, via its mapped
|
||||
// form : cudaArray
|
||||
cudaMemcpy2DToArray(
|
||||
cuArray, // dst array
|
||||
0, 0, // offset
|
||||
currentContextData->cudaLinearMemory, g_texture_2d.pitch, // src
|
||||
g_texture_2d.width * 4 * sizeof(float), g_texture_2d.height, // extent
|
||||
cudaMemcpyDeviceToDevice); // kind
|
||||
getLastCudaError("cudaMemcpy2DToArray failed");
|
||||
}
|
||||
t += 0.1f;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Draw the final result on the screen
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void DrawScene() {
|
||||
// Clear the backbuffer to a black color
|
||||
float ClearColor[4] = {0.5f, 0.5f, 0.6f, 1.0f};
|
||||
g_pd3dDevice->ClearRenderTargetView(g_pSwapChainRTV, ClearColor);
|
||||
|
||||
//
|
||||
// draw the 2d texture
|
||||
//
|
||||
float quadRect[4] = {-0.98f, -0.98f, 1.96f, 1.96f};
|
||||
g_pvQuadRect->SetFloatVector((float *)&quadRect);
|
||||
g_pSimpleTechnique->GetPassByIndex(0)->Apply(0);
|
||||
g_pd3dDevice->Draw(4, 0);
|
||||
|
||||
// Present the backbuffer contents to the display
|
||||
g_pSwapChain->Present(0, 0);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: Cleanup()
|
||||
// Desc: Releases all previously initialized objects
|
||||
//-----------------------------------------------------------------------------
|
||||
int Cleanup() {
|
||||
// unregister the Cuda resources
|
||||
CUresult result = CUDA_SUCCESS;
|
||||
cudaError_t error = cudaSuccess;
|
||||
|
||||
// Drop the D3D resources' refcounts
|
||||
// Unregister the texture with all contexts
|
||||
for (UINT i = 0; i < g_ContextCount; ++i) {
|
||||
printf("Unregistering texture with context %d\n", i);
|
||||
result = cuCtxPushCurrent(g_ContextData[i].context);
|
||||
AssertOrQuit(CUDA_SUCCESS == result);
|
||||
{
|
||||
// Register the resource
|
||||
error = cudaGraphicsUnregisterResource(g_ContextData[i].cudaResource);
|
||||
AssertOrQuit(cudaSuccess == error);
|
||||
}
|
||||
result = cuCtxPopCurrent(&g_ContextData[i].context);
|
||||
AssertOrQuit(CUDA_SUCCESS == result);
|
||||
}
|
||||
|
||||
// Destroy all contexts
|
||||
for (UINT i = 0; i < g_ContextCount; ++i) {
|
||||
printf("Destroying context %d\n", i);
|
||||
result = cuCtxPushCurrent(g_ContextData[i].context);
|
||||
AssertOrQuit(CUDA_SUCCESS == result);
|
||||
}
|
||||
|
||||
//
|
||||
// clean up Direct3D
|
||||
//
|
||||
{
|
||||
// release the resources we created
|
||||
g_texture_2d.pSRView->Release();
|
||||
g_texture_2d.pTexture->Release();
|
||||
|
||||
if (g_pInputLayout != NULL) g_pInputLayout->Release();
|
||||
|
||||
if (g_pSimpleEffect != NULL) g_pSimpleEffect->Release();
|
||||
|
||||
if (g_pSwapChainRTV != NULL) g_pSwapChainRTV->Release();
|
||||
|
||||
if (g_pSwapChain != NULL) g_pSwapChain->Release();
|
||||
|
||||
if (g_pd3dDevice != NULL) g_pd3dDevice->Release();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: Render()
|
||||
// Desc: Launches the CUDA kernels to fill in the texture data
|
||||
//-----------------------------------------------------------------------------
|
||||
int Render() {
|
||||
//
|
||||
// map the resources we've registered so we can access them in Cuda
|
||||
// - it is most efficient to map and unmap all resources in a single call,
|
||||
// and to have the map/unmap calls be the boundary between using the GPU
|
||||
// for Direct3D and Cuda
|
||||
//
|
||||
{
|
||||
cudaStream_t stream = 0;
|
||||
|
||||
cudaError_t error = cudaSuccess;
|
||||
CudaContextData *currentContextData = NULL;
|
||||
|
||||
// get the current device ordinal
|
||||
static int currentDevice = -1;
|
||||
error = cudaD3D10GetDevices(NULL, ¤tDevice, 1, g_pd3dDevice,
|
||||
cudaD3D10DeviceListCurrentFrame);
|
||||
printLastCudaError("cudaD3D10GetDevices failed"); // This prints and resets
|
||||
// the cudaError to
|
||||
// cudaSuccess
|
||||
AssertOrQuit(cudaSuccess == error);
|
||||
|
||||
static int nextDevice = -1;
|
||||
// assert that querying the next device in AFR isn't broken
|
||||
AssertOrQuit(nextDevice == -1 || nextDevice == currentDevice);
|
||||
error = cudaD3D10GetDevices(NULL, &nextDevice, 1, g_pd3dDevice,
|
||||
cudaD3D10DeviceListNextFrame);
|
||||
printLastCudaError("cudaD3D10GetDevices failed"); // This prints and resets
|
||||
// the cudaError to
|
||||
// cudaSuccess
|
||||
AssertOrQuit(cudaSuccess == error);
|
||||
|
||||
// choose context data corresponding to the current device ordinal
|
||||
for (UINT i = 0; i < g_ContextCount; ++i) {
|
||||
if (currentDevice == g_ContextData[i].deviceOrdinal) {
|
||||
currentContextData = &g_ContextData[i];
|
||||
}
|
||||
}
|
||||
|
||||
AssertOrQuit(currentContextData);
|
||||
|
||||
CUresult result;
|
||||
result = cuCtxPushCurrent(currentContextData->context);
|
||||
AssertOrQuit(CUDA_SUCCESS == result);
|
||||
|
||||
cudaGraphicsMapResources(1, ¤tContextData->cudaResource, stream);
|
||||
getLastCudaError("cudaGraphicsMapResources(3) failed");
|
||||
|
||||
//
|
||||
// run kernels which will populate the contents of those textures
|
||||
//
|
||||
RunKernels(currentContextData);
|
||||
|
||||
//
|
||||
// unmap the resources
|
||||
//
|
||||
cudaGraphicsUnmapResources(1, ¤tContextData->cudaResource, stream);
|
||||
getLastCudaError("cudaGraphicsUnmapResources(3) failed");
|
||||
|
||||
CUcontext poppedContext;
|
||||
result = cuCtxPopCurrent(&poppedContext);
|
||||
AssertOrQuit(CUDA_SUCCESS == result);
|
||||
AssertOrQuit(poppedContext == currentContextData->context);
|
||||
}
|
||||
|
||||
//
|
||||
// draw the scene using them
|
||||
//
|
||||
DrawScene();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: MsgProc()
|
||||
// Desc: The window's message handler
|
||||
//-----------------------------------------------------------------------------
|
||||
static LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam,
|
||||
LPARAM lParam) {
|
||||
switch (msg) {
|
||||
case WM_KEYDOWN:
|
||||
if (wParam == VK_ESCAPE) {
|
||||
g_bDone = true;
|
||||
Cleanup();
|
||||
PostQuitMessage(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case WM_DESTROY:
|
||||
g_bDone = true;
|
||||
Cleanup();
|
||||
PostQuitMessage(0);
|
||||
return 0;
|
||||
|
||||
case WM_PAINT:
|
||||
ValidateRect(hWnd, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return DefWindowProc(hWnd, msg, wParam, lParam);
|
||||
}
|
File diff suppressed because one or more lines are too long
@ -1,88 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
//
|
||||
// Paint a 2D texture with a moving red/green hatch pattern on a
|
||||
// strobing blue background. Note that this kernel reads to and
|
||||
// writes from the texture, hence why this texture was not mapped
|
||||
// as WriteDiscard.
|
||||
//
|
||||
__global__ void cuda_kernel_texture_2d(unsigned char *surface, int width,
|
||||
int height, size_t pitch, float t) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
float *pixel;
|
||||
|
||||
// in the case where, due to quantization into grids, we have
|
||||
// more threads than pixels, skip the threads which don't
|
||||
// correspond to valid pixels
|
||||
if (x >= width || y >= height) return;
|
||||
|
||||
// get a pointer to the pixel at (x,y)
|
||||
pixel = (float *)(surface + y * pitch) + 4 * x;
|
||||
|
||||
// populate it
|
||||
float value_x = 0.5f + 0.5f * cos(t + 10.0f * ((2.0f * x) / width - 1.0f));
|
||||
float value_y = 0.5f + 0.5f * cos(t + 10.0f * ((2.0f * y) / height - 1.0f));
|
||||
pixel[0] = value_x > 0.5 ? 1 : 0;
|
||||
pixel[1] = value_y > 0.5 ? 1 : 0;
|
||||
pixel[2] = 0.5f + 0.5f * cos(t);
|
||||
pixel[3] = 1; // alpha
|
||||
|
||||
for (int i = 0; i < 6; ++i) {
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
pixel[j] = sqrt(pixel[j]);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 6; ++i) {
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
pixel[j] *= pixel[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void cuda_texture_2d(void *surface, int width, int height,
|
||||
size_t pitch, float t) {
|
||||
cudaError_t error = cudaSuccess;
|
||||
|
||||
dim3 Db = dim3(16, 16); // block dimensions are fixed to be 256 threads
|
||||
dim3 Dg = dim3((width + Db.x - 1) / Db.x, (height + Db.y - 1) / Db.y);
|
||||
|
||||
cuda_kernel_texture_2d<<<Dg, Db>>>((unsigned char *)surface, width, height,
|
||||
pitch, t);
|
||||
|
||||
error = cudaGetLastError();
|
||||
|
||||
if (error != cudaSuccess) {
|
||||
printf("cuda_kernel_texture_2d() failed to launch error = %d\n", error);
|
||||
}
|
||||
}
|
@ -1,46 +0,0 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Target rules
|
||||
all: build
|
||||
|
||||
build:
|
||||
$(info >>> WARNING - VFlockingD3D10 is not supported on Linux - waiving sample <<<)
|
||||
|
||||
run: build
|
||||
|
||||
testrun: build
|
||||
|
||||
clean:
|
||||
|
||||
clobber: clean
|
@ -1,49 +0,0 @@
|
||||
# VFlockingD3D10 - VFlockingD3D10
|
||||
|
||||
## Description
|
||||
|
||||
The sample models formation of V-shaped flocks by big birds, such as geese and cranes. The algorithms of such flocking are borrowed from the paper "V-like formations in flocks of artificial birds" from Artificial Life, Vol. 14, No. 2, 2008. The sample has CPU- and GPU-based implementations. Press 'g' to toggle between them. The GPU-based simulation works many times faster than the CPU-based one. The printout in the console window reports the simulation time per step. Press 'r' to reset the initial distribution of birds.
|
||||
|
||||
## Key Concepts
|
||||
|
||||
Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation, Performance Strategies
|
||||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
Windows
|
||||
|
||||
## Supported CPU Architecture
|
||||
|
||||
x86_64
|
||||
|
||||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGetErrorString, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaMemset, cudaGraphicsUnregisterResource, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[DirectX](../../../README.md#directx)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
||||
### Windows
|
||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
||||
```
|
||||
*_vs<version>.sln - for Visual Studio <version>
|
||||
```
|
||||
Each individual sample has its own set of solution files in its directory:
|
||||
|
||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
||||
|
||||
## References (for more details)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,45 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef VFLOCKING_D3D10_H
|
||||
#define VFLOCKING_D3D10_H
|
||||
|
||||
#pragma once
|
||||
|
||||
// simulation parameters
|
||||
struct Params {
|
||||
float alpha;
|
||||
float upwashX;
|
||||
float upwashY;
|
||||
float wingspan;
|
||||
float dX;
|
||||
float dY;
|
||||
float epsilon;
|
||||
float lambda; // -0.1073f * wingspan ;
|
||||
};
|
||||
|
||||
#endif
|
@ -1,288 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <helper_cuda.h>
|
||||
#include <helper_math.h>
|
||||
#include <VFlockingD3D10.h>
|
||||
|
||||
#define PI 3.1415926536f
|
||||
|
||||
typedef unsigned int uint;
|
||||
|
||||
__device__ bool isInsideQuad_D(float2 pos0, float2 pos1, float width,
|
||||
float height) {
|
||||
if (fabs(pos0.x - pos1.x) < 0.5f * width &&
|
||||
fabs(pos0.y - pos1.y) < 0.5f * height) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ bool isInsideBird(float2 pixel, float2 pos, float width,
|
||||
float height, float radius) {
|
||||
if (abs(pixel.x - pos.x) < 0.5f * width &&
|
||||
abs(pixel.y - pos.y) < 0.5f * height ||
|
||||
(pixel.x - pos.x) * (pixel.x - pos.x) +
|
||||
(pixel.y - pos.y) * (pixel.y - pos.y) <
|
||||
radius * radius) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void cuda_kernel_update(float2 *newPos, float2 *curPos,
|
||||
uint numBirds, bool *hasproxy,
|
||||
bool *neighbors, bool *rightgoals,
|
||||
bool *leftgoals, Params *params) {
|
||||
uint i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (i >= numBirds) {
|
||||
return;
|
||||
}
|
||||
|
||||
float minDist = 50000.f;
|
||||
float2 dij = make_float2(0.f);
|
||||
|
||||
if (!hasproxy[i]) {
|
||||
for (uint j = 0; j < numBirds; j++) {
|
||||
if (j == i) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (leftgoals[i * numBirds + j]) {
|
||||
dij = params->dX * normalize(curPos[j] - curPos[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bool collision = false;
|
||||
|
||||
for (uint j = 0; j < numBirds; j++) {
|
||||
float d;
|
||||
|
||||
if (leftgoals[i * numBirds + j]) {
|
||||
d = curPos[j].x - (params->wingspan + params->lambda) - curPos[i].x;
|
||||
|
||||
if (fabs(d) < fabs(minDist)) {
|
||||
minDist = d;
|
||||
}
|
||||
}
|
||||
|
||||
if (rightgoals[i * numBirds + j]) {
|
||||
d = curPos[j].x + (params->wingspan + params->lambda) - curPos[i].x;
|
||||
|
||||
if (fabs(d) < fabs(minDist)) {
|
||||
minDist = d;
|
||||
}
|
||||
}
|
||||
|
||||
if (neighbors[i * numBirds + j] && !collision) {
|
||||
if (curPos[j].y >= curPos[i].y &&
|
||||
curPos[j].y < curPos[i].y + params->epsilon) {
|
||||
dij.y = -params->dY;
|
||||
collision = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fabs(minDist) <= params->dX) {
|
||||
return;
|
||||
}
|
||||
|
||||
dij.x = minDist > 0 ? params->dX : -params->dX;
|
||||
}
|
||||
|
||||
newPos[i].x = curPos[i].x + dij.x;
|
||||
newPos[i].y = curPos[i].y + dij.y;
|
||||
}
|
||||
|
||||
__global__ void cuda_kernel_checktriples(float2 *pos, uint numBirds,
|
||||
bool *hasproxy, bool *neighbors,
|
||||
bool *rightgoals, bool *leftgoals,
|
||||
uint3 *triples, Params *params) {
|
||||
uint ith = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (ith >= numBirds * (numBirds - 1) * (numBirds - 2) / 6) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint a[3];
|
||||
a[0] = triples[ith].x;
|
||||
a[1] = triples[ith].y;
|
||||
a[2] = triples[ith].z;
|
||||
|
||||
uint i, j, x;
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
for (j = 2; j > i; j--) {
|
||||
if (pos[a[j - 1]].y > pos[a[j]].y) {
|
||||
x = a[j - 1];
|
||||
a[j - 1] = a[j];
|
||||
a[j] = x;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hasproxy[a[0]]) {
|
||||
float a2a1 = pos[a[2]].x - pos[a[1]].x;
|
||||
|
||||
if (fabs(a2a1) < 2.f * (params->wingspan + params->lambda))
|
||||
if (a2a1 >= 0) {
|
||||
if (leftgoals[a[0] * numBirds + a[2]]) {
|
||||
leftgoals[a[0] * numBirds + a[2]] = false;
|
||||
}
|
||||
|
||||
if (rightgoals[a[0] * numBirds + a[1]]) {
|
||||
rightgoals[a[0] * numBirds + a[1]] = false;
|
||||
}
|
||||
} else {
|
||||
if (leftgoals[a[0] * numBirds + a[1]]) {
|
||||
leftgoals[a[0] * numBirds + a[1]] = false;
|
||||
}
|
||||
|
||||
if (rightgoals[a[0] * numBirds + a[2]]) {
|
||||
rightgoals[a[0] * numBirds + a[2]] = false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ((leftgoals[a[0] * numBirds + a[2]]) &&
|
||||
(leftgoals[a[0] * numBirds + a[1]]))
|
||||
if ((length(pos[a[1]] - pos[a[0]]) < length(pos[a[2]] - pos[a[0]]))) {
|
||||
leftgoals[a[0] * numBirds + a[2]] = false;
|
||||
} else {
|
||||
leftgoals[a[0] * numBirds + a[1]] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void cuda_kernel_checkpairs(float2 *pos, uint numBirds,
|
||||
bool *hasproxy, bool *neighbors,
|
||||
bool *rightgoals, bool *leftgoals,
|
||||
uint2 *pairs, Params *params) {
|
||||
uint i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (i >= numBirds * (numBirds - 1) / 2) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint front, back;
|
||||
|
||||
if (pos[pairs[i].y].y > pos[pairs[i].x].y) {
|
||||
front = pairs[i].y;
|
||||
back = pairs[i].x;
|
||||
} else {
|
||||
front = pairs[i].x;
|
||||
back = pairs[i].y;
|
||||
}
|
||||
|
||||
leftgoals[back * numBirds + front] = true;
|
||||
rightgoals[back * numBirds + front] = true;
|
||||
|
||||
float2 stepback;
|
||||
stepback.x = pos[front].x;
|
||||
stepback.y = pos[front].y - 0.5f * params->upwashY;
|
||||
|
||||
if (isInsideQuad_D(
|
||||
pos[back], stepback,
|
||||
2.f * (params->wingspan + params->lambda + params->upwashX),
|
||||
params->upwashY)) {
|
||||
neighbors[back * numBirds + front] = true;
|
||||
|
||||
if (!hasproxy[back]) {
|
||||
hasproxy[back] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void cuda_simulate(float2 *newPos, float2 *curPos, uint numBirds,
|
||||
bool *d_hasproxy, bool *d_neighbors,
|
||||
bool *d_leftgoals, bool *d_rightgoals,
|
||||
uint2 *d_pairs, uint3 *d_triples,
|
||||
Params *d_params) {
|
||||
cudaError_t error = cudaSuccess;
|
||||
float tempms;
|
||||
static float ms = 0.f;
|
||||
static uint step = 0;
|
||||
int smallblockSize = 32, midblockSize = 128, bigblockSize = 32;
|
||||
|
||||
cudaEvent_t e_start, e_stop;
|
||||
cudaEventCreate(&e_start);
|
||||
cudaEventCreate(&e_stop);
|
||||
cudaEventRecord(e_start, 0);
|
||||
|
||||
cudaMemset(d_leftgoals, 0, numBirds * numBirds * sizeof(bool));
|
||||
cudaMemset(d_rightgoals, 0, numBirds * numBirds * sizeof(bool));
|
||||
cudaMemset(d_hasproxy, 0, numBirds * sizeof(bool));
|
||||
cudaMemset(d_neighbors, 0, numBirds * numBirds * sizeof(bool));
|
||||
|
||||
dim3 Db = dim3(bigblockSize);
|
||||
dim3 Dg =
|
||||
dim3((numBirds * (numBirds - 1) / 2 + bigblockSize - 1) / bigblockSize);
|
||||
cuda_kernel_checkpairs<<<Dg, Db>>>(curPos, numBirds, d_hasproxy, d_neighbors,
|
||||
d_rightgoals, d_leftgoals, d_pairs,
|
||||
d_params);
|
||||
|
||||
Db = dim3(midblockSize);
|
||||
Dg =
|
||||
dim3((numBirds * (numBirds - 1) * (numBirds - 2) / 6 + bigblockSize - 1) /
|
||||
bigblockSize);
|
||||
cuda_kernel_checktriples<<<Dg, Db>>>(curPos, numBirds, d_hasproxy,
|
||||
d_neighbors, d_rightgoals, d_leftgoals,
|
||||
d_triples, d_params);
|
||||
|
||||
Db = dim3(smallblockSize);
|
||||
Dg = dim3((numBirds + smallblockSize - 1) / smallblockSize);
|
||||
cuda_kernel_update<<<Dg, Db>>>(newPos, curPos, numBirds, d_hasproxy,
|
||||
d_neighbors, d_rightgoals, d_leftgoals,
|
||||
d_params /*, d_pWingTips */);
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
cudaEventRecord(e_stop, 0);
|
||||
cudaEventSynchronize(e_stop);
|
||||
cudaEventElapsedTime(&tempms, e_start, e_stop);
|
||||
ms += tempms;
|
||||
|
||||
if (!(step % 100) && step) {
|
||||
printf("GPU, step %d \ntime per step %6.3f ms \n", step, ms / 100.f);
|
||||
ms = 0.f;
|
||||
}
|
||||
|
||||
step++;
|
||||
|
||||
error = cudaGetLastError();
|
||||
|
||||
if (error != cudaSuccess) {
|
||||
printf("one of the cuda kernels failed to launch, error = %d\n", error);
|
||||
}
|
||||
}
|
File diff suppressed because one or more lines are too long
@ -1,46 +0,0 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Target rules
|
||||
all: build
|
||||
|
||||
build:
|
||||
$(info >>> WARNING - simpleD3D10 is not supported on Linux - waiving sample <<<)
|
||||
|
||||
run: build
|
||||
|
||||
testrun: build
|
||||
|
||||
clean:
|
||||
|
||||
clobber: clean
|
@ -1,49 +0,0 @@
|
||||
# simpleD3D10 - Simple Direct3D10 (Vertex Array)
|
||||
|
||||
## Description
|
||||
|
||||
Simple program which demonstrates interoperability between CUDA and Direct3D10. The program generates a vertex array with CUDA and uses Direct3D10 to render the geometry. A Direct3D Capable device is required.
|
||||
|
||||
## Key Concepts
|
||||
|
||||
Graphics Interop, 3D Graphics
|
||||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
Windows
|
||||
|
||||
## Supported CPU Architecture
|
||||
|
||||
x86_64
|
||||
|
||||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaGraphicsUnmapResources, cudaGetErrorString, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaGraphicsUnregisterResource, cudaGetDeviceProperties
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[DirectX](../../../README.md#directx)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
||||
### Windows
|
||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
||||
```
|
||||
*_vs<version>.sln - for Visual Studio <version>
|
||||
```
|
||||
Each individual sample has its own set of solution files in its directory:
|
||||
|
||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
||||
|
||||
## References (for more details)
|
||||
|
Binary file not shown.
@ -1,650 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* This example demonstrates how to use the CUDA Direct3D bindings to fill
|
||||
* a vertex buffer with CUDA and use Direct3D to render the data.
|
||||
* Host code.
|
||||
*/
|
||||
|
||||
#pragma warning(disable : 4312)
|
||||
|
||||
// Windows System include files
|
||||
#include <windows.h>
|
||||
#include <mmsystem.h>
|
||||
|
||||
// This header includes all the necessary D3D10 and CUDA includes
|
||||
#include <dynlink_d3d10.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cuda_d3d10_interop.h>
|
||||
|
||||
// includes, project
|
||||
#include <rendercheck_d3d10.h>
|
||||
#include <helper_functions.h> // Helper functions for other non-cuda utilities
|
||||
#include <helper_cuda.h> // CUDA Helper Functions for initialization
|
||||
|
||||
#define MAX_EPSILON 10
|
||||
|
||||
static char *sSDKSample = "simpleD3D10";
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Global variables
|
||||
//-----------------------------------------------------------------------------
|
||||
IDXGIAdapter *g_pCudaCapableAdapter = NULL; // Adapter to use
|
||||
ID3D10Device *g_pd3dDevice = NULL; // Our rendering device
|
||||
IDXGISwapChain *g_pSwapChain = NULL; // The swap chain of the window
|
||||
ID3D10RenderTargetView *g_pSwapChainRTV =
|
||||
NULL; // The Render target view on the swap chain ( used for clear)
|
||||
|
||||
ID3D10InputLayout *g_pInputLayout = NULL;
|
||||
ID3D10Effect *g_pSimpleEffect = NULL;
|
||||
ID3D10EffectTechnique *g_pSimpleTechnique = NULL;
|
||||
ID3D10EffectMatrixVariable *g_pmWorld = NULL;
|
||||
ID3D10EffectMatrixVariable *g_pmView = NULL;
|
||||
ID3D10EffectMatrixVariable *g_pmProjection = NULL;
|
||||
|
||||
static const char g_simpleEffectSrc[] =
|
||||
"matrix g_mWorld;\n"
|
||||
"matrix g_mView;\n"
|
||||
"matrix g_mProjection;\n"
|
||||
"\n"
|
||||
"struct Fragment{ \n"
|
||||
" float4 Pos : SV_POSITION;\n"
|
||||
" float4 Col : TEXCOORD0; };\n"
|
||||
"\n"
|
||||
"Fragment VS( float4 Pos : POSITION, float4 Col : COLOR )\n"
|
||||
"{\n"
|
||||
" Fragment f;\n"
|
||||
" f.Pos = mul(Pos, g_mWorld);\n"
|
||||
" f.Pos = mul(f.Pos, g_mView);\n"
|
||||
" f.Pos = mul(f.Pos, g_mProjection);\n"
|
||||
" f.Col = Col;\n"
|
||||
" return f;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"float4 PS( Fragment f ) : SV_Target\n"
|
||||
"{\n"
|
||||
" return f.Col;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"technique10 Render\n"
|
||||
"{\n"
|
||||
" pass P0\n"
|
||||
" {\n"
|
||||
" SetVertexShader( CompileShader( vs_4_0, VS() ) );\n"
|
||||
" SetGeometryShader( NULL );\n"
|
||||
" SetPixelShader( CompileShader( ps_4_0, PS() ) );\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n";
|
||||
|
||||
ID3D10Buffer *g_pVB = NULL; // Buffer to hold vertices
|
||||
|
||||
struct cudaGraphicsResource *cuda_VB_resource; // handles D3D10-CUDA exchange
|
||||
|
||||
// testing/tracing function used pervasively in tests. if the condition is
|
||||
// unsatisfied then spew and fail the function immediately (doing no cleanup)
|
||||
#define AssertOrQuit(x) \
|
||||
if (!(x)) { \
|
||||
fprintf(stdout, "Assert unsatisfied in %s at %s:%d\n", __FUNCTION__, \
|
||||
__FILE__, __LINE__); \
|
||||
return 1; \
|
||||
}
|
||||
|
||||
// A structure for our custom vertex type
|
||||
struct CUSTOMVERTEX {
|
||||
FLOAT x, y, z; // The untransformed, 3D position for the vertex
|
||||
DWORD color; // The vertex color
|
||||
};
|
||||
|
||||
const unsigned int g_WindowWidth = 1024;
|
||||
const unsigned int g_WindowHeight = 1024;
|
||||
|
||||
const unsigned int g_MeshWidth = 512;
|
||||
const unsigned int g_MeshHeight = 512;
|
||||
|
||||
const unsigned int g_NumVertices = g_MeshWidth * g_MeshHeight;
|
||||
|
||||
bool g_bPassed = true;
|
||||
int g_iFrameToCompare = 10;
|
||||
|
||||
int *pArgc = NULL;
|
||||
char **pArgv = NULL;
|
||||
|
||||
float anim;
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Forward declarations
|
||||
//-----------------------------------------------------------------------------
|
||||
void runTest(int argc, char **argv, char *ref_file);
|
||||
void runCuda();
|
||||
bool SaveResult(int argc, char **argv);
|
||||
HRESULT InitD3D(HWND hWnd);
|
||||
HRESULT InitGeometry();
|
||||
VOID Cleanup();
|
||||
VOID SetupMatrices();
|
||||
VOID Render();
|
||||
LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
|
||||
|
||||
// CUDA/D3D10 kernel
|
||||
extern "C" void simpleD3DKernel(float4 *pos, unsigned int width,
|
||||
unsigned int height, float time);
|
||||
|
||||
#define NAME_LEN 512
|
||||
|
||||
bool findCUDADevice() {
|
||||
int nGraphicsGPU = 0;
|
||||
int deviceCount = 0;
|
||||
bool bFoundGraphics = false;
|
||||
char devname[NAME_LEN];
|
||||
|
||||
// This function call returns 0 if there are no CUDA capable devices.
|
||||
cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
|
||||
|
||||
if (error_id != cudaSuccess) {
|
||||
printf("cudaGetDeviceCount returned %d\n-> %s\n", (int)error_id,
|
||||
cudaGetErrorString(error_id));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (deviceCount == 0) {
|
||||
printf("> There are no device(s) supporting CUDA\n");
|
||||
return false;
|
||||
} else {
|
||||
printf("> Found %d CUDA Capable Device(s)\n", deviceCount);
|
||||
}
|
||||
|
||||
// Get CUDA device properties
|
||||
cudaDeviceProp deviceProp;
|
||||
|
||||
for (int dev = 0; dev < deviceCount; ++dev) {
|
||||
cudaGetDeviceProperties(&deviceProp, dev);
|
||||
STRCPY(devname, NAME_LEN, deviceProp.name);
|
||||
printf("> GPU %d: %s\n", dev, devname);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool findDXDevice(char *dev_name) {
|
||||
HRESULT hr = S_OK;
|
||||
cudaError cuStatus;
|
||||
|
||||
// Iterate through the candidate adapters
|
||||
IDXGIFactory *pFactory;
|
||||
hr = sFnPtr_CreateDXGIFactory(__uuidof(IDXGIFactory), (void **)(&pFactory));
|
||||
|
||||
if (!SUCCEEDED(hr)) {
|
||||
printf("> No DXGI Factory created.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
UINT adapter = 0;
|
||||
|
||||
for (; !g_pCudaCapableAdapter; ++adapter) {
|
||||
// Get a candidate DXGI adapter
|
||||
IDXGIAdapter *pAdapter = NULL;
|
||||
hr = pFactory->EnumAdapters(adapter, &pAdapter);
|
||||
|
||||
if (FAILED(hr)) {
|
||||
break; // no compatible adapters found
|
||||
}
|
||||
|
||||
// Query to see if there exists a corresponding compute device
|
||||
int cuDevice;
|
||||
cuStatus = cudaD3D10GetDevice(&cuDevice, pAdapter);
|
||||
// This prints and resets the cudaError to cudaSuccess
|
||||
printLastCudaError("cudaD3D10GetDevice failed");
|
||||
|
||||
if (cudaSuccess == cuStatus) {
|
||||
// If so, mark it as the one against which to create our d3d10 device
|
||||
g_pCudaCapableAdapter = pAdapter;
|
||||
g_pCudaCapableAdapter->AddRef();
|
||||
}
|
||||
|
||||
pAdapter->Release();
|
||||
}
|
||||
|
||||
printf("> Found %d D3D10 Adapater(s).\n", (int)adapter);
|
||||
|
||||
pFactory->Release();
|
||||
|
||||
if (!g_pCudaCapableAdapter) {
|
||||
printf("> Found 0 D3D10 Adapater(s) /w Compute capability.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
DXGI_ADAPTER_DESC adapterDesc;
|
||||
g_pCudaCapableAdapter->GetDesc(&adapterDesc);
|
||||
wcstombs(dev_name, adapterDesc.Description, 128);
|
||||
|
||||
printf("> Found 1 D3D10 Adapater(s) /w Compute capability.\n");
|
||||
printf("> %s\n", dev_name);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Program main
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int main(int argc, char **argv) {
|
||||
char device_name[256];
|
||||
char *ref_file = NULL;
|
||||
|
||||
pArgc = &argc;
|
||||
pArgv = argv;
|
||||
|
||||
printf("> %s starting...\n", sSDKSample);
|
||||
|
||||
if (!findCUDADevice()) { // Search for CUDA GPU
|
||||
printf("> CUDA Device NOT found on \"%s\".. Exiting.\n", device_name);
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
// Search for D3D API (locate drivers, does not mean device is found)
|
||||
if (!dynlinkLoadD3D10API()) {
|
||||
printf("> D3D10 API libraries NOT found on.. Exiting.\n");
|
||||
dynlinkUnloadD3D10API();
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
if (!findDXDevice(device_name)) { // Search for D3D Hardware Device
|
||||
printf("> D3D10 Graphics Device NOT found.. Exiting.\n");
|
||||
dynlinkUnloadD3D10API();
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
if (argc > 1) {
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "file")) {
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "file",
|
||||
(char **)&ref_file);
|
||||
}
|
||||
}
|
||||
|
||||
runTest(argc, argv, ref_file);
|
||||
|
||||
//
|
||||
// and exit
|
||||
//
|
||||
printf("%s running on %s exiting...\n", sSDKSample, device_name);
|
||||
printf("%s sample finished returned: %s\n", sSDKSample,
|
||||
(g_bPassed ? "OK" : "ERROR!"));
|
||||
exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Run a simple test for CUDA
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void runTest(int argc, char **argv, char *ref_file) {
|
||||
// Register the window class
|
||||
WNDCLASSEX wc = {sizeof(WNDCLASSEX), CS_CLASSDC, MsgProc, 0L, 0L,
|
||||
GetModuleHandle(NULL), NULL, NULL, NULL, NULL,
|
||||
"CUDA/D3D10 simpleD3D10", NULL};
|
||||
RegisterClassEx(&wc);
|
||||
|
||||
// Create the application's window
|
||||
int xBorder = ::GetSystemMetrics(SM_CXSIZEFRAME);
|
||||
int yMenu = ::GetSystemMetrics(SM_CYMENU);
|
||||
int yBorder = ::GetSystemMetrics(SM_CYSIZEFRAME);
|
||||
HWND hWnd = CreateWindow(
|
||||
wc.lpszClassName, "CUDA/D3D10 simpleD3D10", WS_OVERLAPPEDWINDOW, 0, 0,
|
||||
g_WindowWidth + 2 * xBorder, g_WindowHeight + 2 * yBorder + yMenu, NULL,
|
||||
NULL, wc.hInstance, NULL);
|
||||
|
||||
// Initialize Direct3D
|
||||
if (SUCCEEDED(InitD3D(hWnd))) {
|
||||
// Create the scene geometry
|
||||
if (SUCCEEDED(InitGeometry())) {
|
||||
// Initialize interoperability between CUDA and Direct3D
|
||||
// Register vertex buffer with CUDA
|
||||
// DEPRECATED: cudaD3D10RegisterResource(g_pVB,
|
||||
// cudaD3D10RegisterFlagsNone);
|
||||
cudaGraphicsD3D10RegisterResource(&cuda_VB_resource, g_pVB,
|
||||
cudaD3D10RegisterFlagsNone);
|
||||
getLastCudaError("cudaGraphicsD3D10RegisterResource (g_pVB) failed");
|
||||
|
||||
// Initialize vertex buffer with CUDA
|
||||
runCuda();
|
||||
|
||||
// Save result
|
||||
SaveResult(argc, argv);
|
||||
|
||||
// Show the window
|
||||
ShowWindow(hWnd, SW_SHOWDEFAULT);
|
||||
UpdateWindow(hWnd);
|
||||
|
||||
// Enter the message loop
|
||||
MSG msg;
|
||||
ZeroMemory(&msg, sizeof(msg));
|
||||
|
||||
while (msg.message != WM_QUIT) {
|
||||
if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE)) {
|
||||
TranslateMessage(&msg);
|
||||
DispatchMessage(&msg);
|
||||
} else {
|
||||
Render();
|
||||
|
||||
if (ref_file != NULL) {
|
||||
for (int count = 0; count < g_iFrameToCompare; count++) {
|
||||
Render();
|
||||
}
|
||||
|
||||
const char *cur_image_path = "simpleD3D10.ppm";
|
||||
|
||||
// Save a reference of our current test run image
|
||||
CheckRenderD3D10::ActiveRenderTargetToPPM(g_pd3dDevice,
|
||||
cur_image_path);
|
||||
|
||||
// compare to offical reference image, printing PASS or FAIL.
|
||||
g_bPassed = CheckRenderD3D10::PPMvsPPM(cur_image_path, ref_file,
|
||||
argv[0], MAX_EPSILON, 0.15f);
|
||||
|
||||
Cleanup();
|
||||
|
||||
PostQuitMessage(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Release D3D Library (after message loop)
|
||||
dynlinkUnloadD3D10API();
|
||||
|
||||
UnregisterClass(wc.lpszClassName, wc.hInstance);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Run the Cuda part of the computation
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void runCuda() {
|
||||
// Map vertex buffer to Cuda
|
||||
float4 *d_ptr;
|
||||
|
||||
// CUDA Map call to the Vertex Buffer and return a pointer
|
||||
// DEPRECATED: cudaD3D10MapResources(1, (ID3D10Resource **)&g_pVB);
|
||||
checkCudaErrors(cudaGraphicsMapResources(1, &cuda_VB_resource, 0));
|
||||
getLastCudaError("cudaGraphicsMapResources failed");
|
||||
|
||||
// DEPRECATED: cudaD3D10ResourceGetMappedPointer( (void **)&dptr, g_pVB, 0);
|
||||
size_t num_bytes;
|
||||
checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
|
||||
(void **)&d_ptr, &num_bytes, cuda_VB_resource));
|
||||
getLastCudaError("cudaGraphicsResourceGetMappedPointer failed");
|
||||
|
||||
// Execute kernel
|
||||
simpleD3DKernel(d_ptr, g_MeshWidth, g_MeshHeight, anim);
|
||||
|
||||
// CUDA Map Unmap vertex buffer
|
||||
// DEPRECATED: cudaD3D10UnmapResources(1, (ID3D10Resource **)&g_pVB);
|
||||
checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_VB_resource, 0));
|
||||
getLastCudaError("cudaGraphicsUnmapResource failed");
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Check if the result is correct or write data to file for external
|
||||
//! regression testing
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool SaveResult(int argc, char **argv) {
|
||||
// Map vertex buffer
|
||||
float *data;
|
||||
|
||||
if (FAILED(g_pVB->Map(D3D10_MAP_READ, 0,
|
||||
(void **)&data))) // Lock(0, 0, (void**)&data, 0)))
|
||||
return false;
|
||||
|
||||
// Unmap
|
||||
g_pVB->Unmap();
|
||||
|
||||
// Save result
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "regression")) {
|
||||
// write file for regression test
|
||||
sdkWriteFile<float>("./data/regression.dat", data, sizeof(CUSTOMVERTEX),
|
||||
0.0f, false);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: InitD3D()
|
||||
// Desc: Initializes Direct3D
|
||||
//-----------------------------------------------------------------------------
|
||||
HRESULT InitD3D(HWND hWnd) {
|
||||
// Set up the structure used to create the device and swapchain
|
||||
DXGI_SWAP_CHAIN_DESC sd;
|
||||
ZeroMemory(&sd, sizeof(sd));
|
||||
sd.BufferCount = 1;
|
||||
sd.BufferDesc.Width = g_WindowWidth;
|
||||
sd.BufferDesc.Height = g_WindowHeight;
|
||||
sd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
sd.BufferDesc.RefreshRate.Numerator = 60;
|
||||
sd.BufferDesc.RefreshRate.Denominator = 1;
|
||||
sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
|
||||
sd.OutputWindow = hWnd;
|
||||
sd.SampleDesc.Count = 1;
|
||||
sd.SampleDesc.Quality = 0;
|
||||
sd.Windowed = TRUE;
|
||||
|
||||
// Create device and swapchain
|
||||
HRESULT hr = sFnPtr_D3D10CreateDeviceAndSwapChain(
|
||||
g_pCudaCapableAdapter, D3D10_DRIVER_TYPE_HARDWARE, NULL, 0,
|
||||
D3D10_SDK_VERSION, &sd, &g_pSwapChain, &g_pd3dDevice);
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
g_pCudaCapableAdapter->Release();
|
||||
|
||||
// Create a render target view of the swapchain
|
||||
ID3D10Texture2D *pBuffer;
|
||||
hr =
|
||||
g_pSwapChain->GetBuffer(0, __uuidof(ID3D10Texture2D), (LPVOID *)&pBuffer);
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
|
||||
hr = g_pd3dDevice->CreateRenderTargetView(pBuffer, NULL, &g_pSwapChainRTV);
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
pBuffer->Release();
|
||||
|
||||
g_pd3dDevice->OMSetRenderTargets(1, &g_pSwapChainRTV, NULL);
|
||||
|
||||
// Setup the viewport
|
||||
D3D10_VIEWPORT vp;
|
||||
vp.Width = g_WindowWidth;
|
||||
vp.Height = g_WindowHeight;
|
||||
vp.MinDepth = 0.0f;
|
||||
vp.MaxDepth = 1.0f;
|
||||
vp.TopLeftX = 0;
|
||||
vp.TopLeftY = 0;
|
||||
g_pd3dDevice->RSSetViewports(1, &vp);
|
||||
|
||||
// Setup the effect
|
||||
{
|
||||
ID3D10Blob *pCompiledEffect;
|
||||
hr = sFnPtr_D3D10CompileEffectFromMemory((void *)g_simpleEffectSrc,
|
||||
sizeof(g_simpleEffectSrc), NULL,
|
||||
NULL, // pDefines
|
||||
NULL, // pIncludes
|
||||
0, // HLSL flags
|
||||
0, // FXFlags
|
||||
&pCompiledEffect, NULL);
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
|
||||
hr = sFnPtr_D3D10CreateEffectFromMemory(
|
||||
pCompiledEffect->GetBufferPointer(), pCompiledEffect->GetBufferSize(),
|
||||
0, // FXFlags
|
||||
g_pd3dDevice, NULL, &g_pSimpleEffect);
|
||||
pCompiledEffect->Release();
|
||||
|
||||
g_pSimpleTechnique = g_pSimpleEffect->GetTechniqueByName("Render");
|
||||
|
||||
// g_pmWorldViewProjection =
|
||||
// g_pSimpleEffect->GetVariableByName("g_mWorldViewProjection")->AsMatrix();
|
||||
g_pmWorld = g_pSimpleEffect->GetVariableByName("g_mWorld")->AsMatrix();
|
||||
g_pmView = g_pSimpleEffect->GetVariableByName("g_mView")->AsMatrix();
|
||||
g_pmProjection =
|
||||
g_pSimpleEffect->GetVariableByName("g_mProjection")->AsMatrix();
|
||||
|
||||
// Define the input layout
|
||||
D3D10_INPUT_ELEMENT_DESC layout[] = {
|
||||
{"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0,
|
||||
D3D10_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 12,
|
||||
D3D10_INPUT_PER_VERTEX_DATA, 0},
|
||||
};
|
||||
UINT numElements = sizeof(layout) / sizeof(layout[0]);
|
||||
|
||||
// Create the input layout
|
||||
D3D10_PASS_DESC PassDesc;
|
||||
g_pSimpleTechnique->GetPassByIndex(0)->GetDesc(&PassDesc);
|
||||
hr = g_pd3dDevice->CreateInputLayout(
|
||||
layout, numElements, PassDesc.pIAInputSignature,
|
||||
PassDesc.IAInputSignatureSize, &g_pInputLayout);
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
|
||||
// Setup Input Layout, apply effect and draw points
|
||||
g_pd3dDevice->IASetInputLayout(g_pInputLayout);
|
||||
g_pSimpleTechnique->GetPassByIndex(0)->Apply(0);
|
||||
g_pd3dDevice->IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY_POINTLIST);
|
||||
}
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: InitGeometry()
|
||||
// Desc: Creates the scene geometry
|
||||
//-----------------------------------------------------------------------------
|
||||
HRESULT InitGeometry() {
|
||||
// Setup buffer desc
|
||||
D3D10_BUFFER_DESC bufferDesc;
|
||||
bufferDesc.Usage = D3D10_USAGE_DEFAULT;
|
||||
bufferDesc.ByteWidth = sizeof(CUSTOMVERTEX) * g_NumVertices;
|
||||
bufferDesc.BindFlags = D3D10_BIND_VERTEX_BUFFER;
|
||||
bufferDesc.CPUAccessFlags = 0;
|
||||
bufferDesc.MiscFlags = 0;
|
||||
|
||||
// Create the buffer, no need for sub resource data struct since everything
|
||||
// will be defined from cuda
|
||||
if (FAILED(g_pd3dDevice->CreateBuffer(&bufferDesc, NULL, &g_pVB)))
|
||||
return E_FAIL;
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: Cleanup()
|
||||
// Desc: Releases all previously initialized objects
|
||||
//-----------------------------------------------------------------------------
|
||||
VOID Cleanup() {
|
||||
if (g_pVB != NULL) {
|
||||
// Unregister vertex buffer
|
||||
// DEPRECATED: checkCudaErrors(cudaD3D10UnregisterResource(g_pVB));
|
||||
cudaGraphicsUnregisterResource(cuda_VB_resource);
|
||||
getLastCudaError("cudaGraphicsUnregisterResource failed");
|
||||
|
||||
g_pVB->Release();
|
||||
}
|
||||
|
||||
if (g_pInputLayout != NULL) g_pInputLayout->Release();
|
||||
|
||||
if (g_pSimpleEffect != NULL) g_pSimpleEffect->Release();
|
||||
|
||||
if (g_pSwapChainRTV != NULL) g_pSwapChainRTV->Release();
|
||||
|
||||
if (g_pSwapChain != NULL) g_pSwapChain->Release();
|
||||
|
||||
if (g_pd3dDevice != NULL) g_pd3dDevice->Release();
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: SetupMatrices()
|
||||
// Desc: Sets up the world, view, and projection transform matrices.
|
||||
//-----------------------------------------------------------------------------
|
||||
VOID SetupMatrices() {
|
||||
XMMATRIX matWorld;
|
||||
matWorld = XMMatrixIdentity();
|
||||
|
||||
XMVECTOR vEyePt = {0.0f, 3.0f, -2.0f};
|
||||
XMVECTOR vLookatPt = {0.0f, 0.0f, 0.0f};
|
||||
XMVECTOR vUpVec = {0.0f, 1.0f, 0.0f};
|
||||
XMMATRIX matView;
|
||||
matView = XMMatrixLookAtLH(vEyePt, vLookatPt, vUpVec);
|
||||
|
||||
XMMATRIX matProj;
|
||||
matProj = XMMatrixPerspectiveFovLH((float)XM_PI / 4.f, 1.0f, 0.01f, 10.0f);
|
||||
|
||||
g_pmWorld->SetMatrix((float *)&matWorld);
|
||||
g_pmView->SetMatrix((float *)&matView);
|
||||
g_pmProjection->SetMatrix((float *)&matProj);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: Render()
|
||||
// Desc: Draws the scene
|
||||
//-----------------------------------------------------------------------------
|
||||
VOID Render() {
|
||||
// Clear the backbuffer to a black color
|
||||
float ClearColor[4] = {0, 0, 0, 0};
|
||||
g_pd3dDevice->ClearRenderTargetView(g_pSwapChainRTV, ClearColor);
|
||||
|
||||
// Run CUDA to update vertex positions
|
||||
runCuda();
|
||||
|
||||
// Draw frame
|
||||
{
|
||||
// Setup the world, view, and projection matrices
|
||||
SetupMatrices();
|
||||
|
||||
// Render the vertex buffer contents
|
||||
UINT stride = sizeof(CUSTOMVERTEX);
|
||||
UINT offset = 0;
|
||||
g_pd3dDevice->IASetVertexBuffers(0, 1, &g_pVB, &stride, &offset);
|
||||
|
||||
g_pSimpleTechnique->GetPassByIndex(0)->Apply(0);
|
||||
g_pd3dDevice->Draw(g_NumVertices, 0);
|
||||
}
|
||||
|
||||
// Present the backbuffer contents to the display
|
||||
g_pSwapChain->Present(0, 0);
|
||||
|
||||
anim += 0.01f;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: MsgProc()
|
||||
// Desc: The window's message handler
|
||||
//-----------------------------------------------------------------------------
|
||||
LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) {
|
||||
switch (msg) {
|
||||
case WM_DESTROY:
|
||||
case WM_KEYDOWN:
|
||||
if (msg != WM_KEYDOWN || wParam == 27) {
|
||||
Cleanup();
|
||||
PostQuitMessage(0);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return DefWindowProc(hWnd, msg, wParam, lParam);
|
||||
}
|
@ -1,85 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* This example demonstrates how to use the CUDA Direct3D bindings with the
|
||||
* runtime API.
|
||||
* Device code.
|
||||
*/
|
||||
|
||||
#ifndef _SIMPLED3D_KERNEL_CU_
|
||||
#define _SIMPLED3D_KERNEL_CU_
|
||||
|
||||
// includes, C string library
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// includes, cuda
|
||||
#include <cuda.h>
|
||||
#include <builtin_types.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//! Simple kernel to modify vertex positions in sine wave pattern
|
||||
//! @param pos pos in global memory
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
__global__ void kernel(float4 *pos, unsigned int width, unsigned int height,
|
||||
float time) {
|
||||
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
// calculate uv coordinates
|
||||
float u = x / (float)width;
|
||||
float v = y / (float)height;
|
||||
u = u * 2.0f - 1.0f;
|
||||
v = v * 2.0f - 1.0f;
|
||||
|
||||
// calculate simple sine wave pattern
|
||||
float freq = 4.0f;
|
||||
float w = sinf(u * freq + time) * cosf(v * freq + time) * 0.5f;
|
||||
|
||||
// write output vertex
|
||||
pos[y * width + x] = make_float4(u, w, v, __int_as_float(0xff00ff00));
|
||||
}
|
||||
|
||||
extern "C" void simpleD3DKernel(float4 *pos, unsigned int width,
|
||||
unsigned int height, float time) {
|
||||
cudaError_t error = cudaSuccess;
|
||||
|
||||
dim3 block(8, 8, 1);
|
||||
dim3 grid(width / block.x, height / block.y, 1);
|
||||
|
||||
kernel<<<grid, block>>>(pos, width, height, time);
|
||||
|
||||
error = cudaGetLastError();
|
||||
|
||||
if (error != cudaSuccess) {
|
||||
printf("kernel() failed to launch error = %d\n", error);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // #ifndef _SIMPLED3D_KERNEL_CU_
|
@ -1,46 +0,0 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Target rules
|
||||
all: build
|
||||
|
||||
build:
|
||||
$(info >>> WARNING - simpleD3D10RenderTarget is not supported on Linux - waiving sample <<<)
|
||||
|
||||
run: build
|
||||
|
||||
testrun: build
|
||||
|
||||
clean:
|
||||
|
||||
clobber: clean
|
@ -1,49 +0,0 @@
|
||||
# simpleD3D10RenderTarget - Simple Direct3D10 Render Target
|
||||
|
||||
## Description
|
||||
|
||||
Simple program which demonstrates interop of rendertargets between Direct3D10 and CUDA. The program uses RenderTarget positions with CUDA and generates a histogram with visualization. A Direct3D10 Capable device is required.
|
||||
|
||||
## Key Concepts
|
||||
|
||||
Graphics Interop, Texture
|
||||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
Windows
|
||||
|
||||
## Supported CPU Architecture
|
||||
|
||||
x86_64
|
||||
|
||||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaGraphicsUnmapResources, cudaMemcpy, cudaMalloc, cudaUnbindTexture, cudaGetErrorString, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaBindTextureToArray, cudaGetDeviceProperties
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[DirectX](../../../README.md#directx)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
||||
### Windows
|
||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
||||
```
|
||||
*_vs<version>.sln - for Visual Studio <version>
|
||||
```
|
||||
Each individual sample has its own set of solution files in its directory:
|
||||
|
||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
||||
|
||||
## References (for more details)
|
||||
|
Binary file not shown.
@ -1,850 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* This example demonstrates how to use the CUDA Direct3D bindings to fill
|
||||
* a vertex buffer with CUDA and use Direct3D to render the data.
|
||||
* Host code.
|
||||
*/
|
||||
|
||||
#pragma warning(disable : 4312)
|
||||
|
||||
#include <windows.h>
|
||||
#include <mmsystem.h>
|
||||
|
||||
// this header inclues all the necessary D3D10 includes
|
||||
#include <dynlink_d3d10.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cuda_d3d10_interop.h>
|
||||
|
||||
// includes, project
|
||||
#include <rendercheck_d3d10.h>
|
||||
#include <helper_cuda.h>
|
||||
#include <helper_functions.h>
|
||||
|
||||
int g_iFrameToCompare = 10;
|
||||
|
||||
bool g_bDone = false;
|
||||
bool g_bPassed = true;
|
||||
|
||||
int *pArgc = NULL;
|
||||
char **pArgv = NULL;
|
||||
|
||||
#define MAX_EPSILON 10
|
||||
|
||||
static char *SDK_name = "simpleD3D10RenderTarget";
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Global variables
|
||||
//-----------------------------------------------------------------------------
|
||||
IDXGIAdapter *g_pCudaCapableAdapter = NULL; // Adapter to use
|
||||
ID3D10Device *g_pd3dDevice = NULL; // Our rendering device
|
||||
IDXGISwapChain *g_pSwapChain = NULL; // The swap chain of the window
|
||||
ID3D10RenderTargetView *g_pSwapChainRTV =
|
||||
NULL; // The Render target view on the swap chain ( used for clear)
|
||||
ID3D10RasterizerState *g_pRasterState = NULL;
|
||||
|
||||
struct Color {
|
||||
ID3D10Texture2D *pBuffer; // The color buffer
|
||||
ID3D10RenderTargetView
|
||||
*pBufferRTV; // The Render target view on the color buffer
|
||||
ID3D10ShaderResourceView
|
||||
*pBufferSRV; // The shader resource view on the color buffer
|
||||
cudaGraphicsResource *cudaResource; // resource of the Buffer on cuda side
|
||||
int pitch;
|
||||
cudaArray *pCudaArray; // the data in a cuda view
|
||||
} g_color;
|
||||
|
||||
struct Histogram {
|
||||
ID3D10Buffer *pBuffer; // Buffer to hold histogram
|
||||
ID3D10ShaderResourceView *pBufferSRV; // View on the histogram buffer
|
||||
cudaGraphicsResource *cudaResource; // resource of the Buffer on cuda side
|
||||
unsigned int *cudaBuffer; // staging buffer to allow cuda to write results
|
||||
// cudaArray* pCudaArray; // the data in a cuda view
|
||||
size_t size;
|
||||
} g_histogram;
|
||||
|
||||
ID3D10Effect *g_pDisplayEffect = NULL;
|
||||
ID3D10EffectTechnique *g_pDisplayTechnique = NULL;
|
||||
ID3D10EffectScalarVariable *g_pTime = NULL;
|
||||
|
||||
static const char g_displayEffectSrc[] =
|
||||
"float g_Time; \n"
|
||||
"uint2 g_vGrid = uint2(20,20); \n"
|
||||
"float4 g_vGridSize = float4(0.05f, 0.05f, 0.046f, 0.046f); \n"
|
||||
"\n"
|
||||
"struct Fragment{ \n"
|
||||
" float4 Pos : SV_POSITION;\n"
|
||||
" float2 Tex : TEXCOORD0; \n"
|
||||
" float4 Col : TEXCOORD1; };\n"
|
||||
"\n"
|
||||
"Fragment VS( uint instanceId : SV_InstanceID, uint vertexId : SV_VertexID "
|
||||
")\n"
|
||||
"{\n"
|
||||
" Fragment f;\n"
|
||||
" f.Tex = float2( 1.f*((vertexId == 1) || (vertexId == 3)), 1.f*( "
|
||||
"vertexId >= 2)); \n"
|
||||
" \n"
|
||||
" uint2 cellId = uint2(instanceId % g_vGrid.x, instanceId / "
|
||||
"g_vGrid.x);\n"
|
||||
" f.Pos = float4( g_vGridSize.xy*cellId + 0.5f*(g_vGridSize.xy - "
|
||||
"g_vGridSize.zw) + f.Tex * g_vGridSize.zw, 0, 1);\n"
|
||||
" f.Pos.xy = (f.Pos.xy*2.f - 1.f);\n"
|
||||
" \n"
|
||||
" f.Col = float4( ((g_vGrid.x-1.f) - cellId.x) / (g_vGrid.x-1.f), "
|
||||
"(cellId.x + (g_vGrid.y-1.f) - cellId.y) / (g_vGrid.x+g_vGrid.y-1.f), "
|
||||
"cellId.y / (g_vGrid.y-1.f), 1.f);\n"
|
||||
" f.Col *= float4( 0.5 + 0.5*sin(g_Time), 0.5 + "
|
||||
"0.5*sin(g_Time)*cos(g_Time), 0.5 + 0.5*cos(g_Time), 1.f);\n"
|
||||
" return f;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"float4 PS( Fragment f ) : SV_Target\n"
|
||||
"{\n"
|
||||
" return f.Col;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"technique10 Render\n"
|
||||
"{\n"
|
||||
" pass P0\n"
|
||||
" {\n"
|
||||
" SetVertexShader( CompileShader( vs_4_0, VS() ) );\n"
|
||||
" SetGeometryShader( NULL );\n"
|
||||
" SetPixelShader( CompileShader( ps_4_0, PS() ) );\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n";
|
||||
|
||||
ID3D10Effect *g_pCompositeEffect = NULL;
|
||||
ID3D10EffectTechnique *g_pCompositeTechnique = NULL;
|
||||
ID3D10EffectVectorVariable *g_pvQuadRect = NULL;
|
||||
ID3D10EffectScalarVariable *g_pUseCase = NULL;
|
||||
ID3D10EffectShaderResourceVariable *g_pTexture2D = NULL;
|
||||
ID3D10EffectShaderResourceVariable *g_pHistogram = NULL;
|
||||
|
||||
static const char g_compositeEffectSrc[] =
|
||||
"float4 g_vQuadRect; \n"
|
||||
"int g_UseCase; \n"
|
||||
"Texture2D g_Texture2D; \n"
|
||||
"Buffer<uint> g_Histogram; \n"
|
||||
"\n"
|
||||
"SamplerState samLinear{ \n"
|
||||
" Filter = MIN_MAG_LINEAR_MIP_POINT; \n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"struct Fragment{ \n"
|
||||
" float4 Pos : SV_POSITION;\n"
|
||||
" float3 Tex : TEXCOORD0; \n"
|
||||
" float2 uv : TEXCOORD1; };\n"
|
||||
"\n"
|
||||
"Fragment VS( uint vertexId : SV_VertexID )\n"
|
||||
"{\n"
|
||||
" Fragment f;\n"
|
||||
" f.Tex = float3( 1.f*((vertexId == 1) || (vertexId == 3)), 1.f*( "
|
||||
"vertexId >= 2), 0.f); \n"
|
||||
" \n"
|
||||
" f.Pos = float4( g_vQuadRect.xy + f.Tex * g_vQuadRect.zw, 0, 1);\n"
|
||||
" \n"
|
||||
" f.uv = float2( f.Tex.x*255.f, f.Tex.y*50000.f ); \n"
|
||||
" return f;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"float4 PS( Fragment f ) : SV_Target\n"
|
||||
"{\n"
|
||||
" if (g_UseCase == 0) \n"
|
||||
" return g_Texture2D.Sample( samLinear, f.Tex.xy ); \n"
|
||||
" else if (g_UseCase == 1) { \n"
|
||||
" uint index = f.uv.x; \n"
|
||||
" float value = g_Histogram.Load( index ); \n"
|
||||
" //float value = index * 1000; \n"
|
||||
" float red = ( value >= f.uv.y ? (0.5f * f.uv.y / value) + 0.5f : "
|
||||
"0.f ); \n"
|
||||
" return float4(red, 0, 0, 1); \n"
|
||||
" } else return float4(f.Tex, 1);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"technique10 Render\n"
|
||||
"{\n"
|
||||
" pass P0\n"
|
||||
" {\n"
|
||||
" SetVertexShader( CompileShader( vs_4_0, VS() ) );\n"
|
||||
" SetGeometryShader( NULL );\n"
|
||||
" SetPixelShader( CompileShader( ps_4_0, PS() ) );\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n";
|
||||
|
||||
// testing/tracing function used pervasively in tests. if the condition is
|
||||
// unsatisfied then spew and fail the function immediately (doing no cleanup)
|
||||
#define AssertOrQuit(x) \
|
||||
if (!(x)) { \
|
||||
fprintf(stdout, "Assert unsatisfied in %s at %s:%d\n", __FUNCTION__, \
|
||||
__FILE__, __LINE__); \
|
||||
return 1; \
|
||||
}
|
||||
|
||||
const unsigned int g_WindowWidth = 800;
|
||||
const unsigned int g_WindowHeight = 800;
|
||||
const unsigned int g_HistogramSize = 256;
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Forward declarations
|
||||
//-----------------------------------------------------------------------------
|
||||
void runTest(int argc, char **argv, char *ref_file);
|
||||
void runCuda();
|
||||
HRESULT InitD3D(HWND hWnd);
|
||||
VOID Cleanup();
|
||||
VOID Render();
|
||||
LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
|
||||
|
||||
// CUDA/D3D10 kernels
|
||||
extern "C" void checkCudaError();
|
||||
extern "C" void createHistogramTex(unsigned int *histogram, unsigned int width,
|
||||
unsigned int height, cudaArray *colorArray);
|
||||
|
||||
#define NAME_LEN 512
|
||||
|
||||
bool findCUDADevice() {
|
||||
int nGraphicsGPU = 0;
|
||||
int deviceCount = 0;
|
||||
bool bFoundGraphics = false;
|
||||
char devname[NAME_LEN];
|
||||
|
||||
// This function call returns 0 if there are no CUDA capable devices.
|
||||
cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
|
||||
|
||||
if (error_id != cudaSuccess) {
|
||||
printf("cudaGetDeviceCount returned %d\n-> %s\n", (int)error_id,
|
||||
cudaGetErrorString(error_id));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (deviceCount == 0) {
|
||||
printf("> There are no device(s) supporting CUDA\n");
|
||||
return false;
|
||||
} else {
|
||||
printf("> Found %d CUDA Capable Device(s)\n", deviceCount);
|
||||
}
|
||||
|
||||
// Get CUDA device properties
|
||||
cudaDeviceProp deviceProp;
|
||||
|
||||
for (int dev = 0; dev < deviceCount; ++dev) {
|
||||
cudaGetDeviceProperties(&deviceProp, dev);
|
||||
STRCPY(devname, NAME_LEN, deviceProp.name);
|
||||
printf("> GPU %d: %s\n", dev, devname);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool findDXDevice(char *dev_name) {
|
||||
HRESULT hr = S_OK;
|
||||
cudaError cuStatus;
|
||||
|
||||
// Iterate through the candidate adapters
|
||||
IDXGIFactory *pFactory;
|
||||
hr = sFnPtr_CreateDXGIFactory(__uuidof(IDXGIFactory), (void **)(&pFactory));
|
||||
|
||||
if (!SUCCEEDED(hr)) {
|
||||
printf("> No DXGI Factory created.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
UINT adapter = 0;
|
||||
|
||||
for (; !g_pCudaCapableAdapter; ++adapter) {
|
||||
// Get a candidate DXGI adapter
|
||||
IDXGIAdapter *pAdapter = NULL;
|
||||
hr = pFactory->EnumAdapters(adapter, &pAdapter);
|
||||
|
||||
if (FAILED(hr)) {
|
||||
break; // no compatible adapters found
|
||||
}
|
||||
|
||||
// Query to see if there exists a corresponding compute device
|
||||
int cuDevice;
|
||||
cuStatus = cudaD3D10GetDevice(&cuDevice, pAdapter);
|
||||
printLastCudaError("cudaD3D10GetDevice failed"); // This prints and resets
|
||||
// the cudaError to
|
||||
// cudaSuccess
|
||||
|
||||
if (cudaSuccess == cuStatus) {
|
||||
// If so, mark it as the one against which to create our d3d10 device
|
||||
g_pCudaCapableAdapter = pAdapter;
|
||||
g_pCudaCapableAdapter->AddRef();
|
||||
}
|
||||
|
||||
pAdapter->Release();
|
||||
}
|
||||
|
||||
printf("> Found %d D3D10 Adapater(s).\n", (int)adapter);
|
||||
|
||||
pFactory->Release();
|
||||
|
||||
if (!g_pCudaCapableAdapter) {
|
||||
printf("> Found 0 D3D10 Adapater(s) /w Compute capability.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
DXGI_ADAPTER_DESC adapterDesc;
|
||||
g_pCudaCapableAdapter->GetDesc(&adapterDesc);
|
||||
wcstombs(dev_name, adapterDesc.Description, 128);
|
||||
|
||||
printf("> Found 1 D3D10 Adapater(s) /w Compute capability.\n");
|
||||
printf("> %s\n", dev_name);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Program main
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int main(int argc, char **argv) {
|
||||
char device_name[256];
|
||||
char *ref_file = NULL;
|
||||
|
||||
pArgc = &argc;
|
||||
pArgv = argv;
|
||||
|
||||
printf("[%s] - Starting...\n", SDK_name);
|
||||
|
||||
if (!findCUDADevice()) // Search for CUDA GPU
|
||||
{
|
||||
printf("> CUDA Device NOT found on \"%s\".. Exiting.\n", device_name);
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
if (!dynlinkLoadD3D10API()) // Search for D3D API (locate drivers, does not
|
||||
// mean device is found)
|
||||
{
|
||||
printf("> D3D10 API libraries NOT found.. Exiting.\n");
|
||||
dynlinkUnloadD3D10API();
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
if (!findDXDevice(device_name)) // Search for D3D Hardware Device
|
||||
{
|
||||
printf("> D3D10 Graphics Device NOT found.. Exiting.\n");
|
||||
dynlinkUnloadD3D10API();
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
// command line options
|
||||
if (argc > 1) {
|
||||
// automatied build testing harness
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "file"))
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
|
||||
}
|
||||
|
||||
// run D3D10/CUDA test
|
||||
runTest(argc, argv, ref_file);
|
||||
|
||||
//
|
||||
// and exit
|
||||
//
|
||||
printf("%s running on %s exiting...\n", SDK_name, device_name);
|
||||
|
||||
exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Run a simple test for CUDA
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void runTest(int argc, char **argv, char *ref_file) {
|
||||
// Register the window class
|
||||
WNDCLASSEX wc = {sizeof(WNDCLASSEX),
|
||||
CS_CLASSDC,
|
||||
MsgProc,
|
||||
0L,
|
||||
0L,
|
||||
GetModuleHandle(NULL),
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
"CUDA SDK",
|
||||
NULL};
|
||||
RegisterClassEx(&wc);
|
||||
|
||||
// Create the application's window
|
||||
HWND hWnd = CreateWindow(wc.lpszClassName, "CUDA/D3D10 RenderTarget InterOP",
|
||||
WS_OVERLAPPEDWINDOW, 100, 100, g_WindowWidth,
|
||||
g_WindowHeight, NULL, NULL, wc.hInstance, NULL);
|
||||
|
||||
// Initialize Direct3D
|
||||
if (SUCCEEDED(InitD3D(hWnd))) {
|
||||
// Initialize interoperability between CUDA and Direct3D
|
||||
// Register vertex buffer with CUDA
|
||||
cudaGraphicsD3D10RegisterResource(&g_histogram.cudaResource,
|
||||
g_histogram.pBuffer,
|
||||
cudaGraphicsMapFlagsNone);
|
||||
getLastCudaError("cudaGraphicsD3D10RegisterResource (g_pHistogram) failed");
|
||||
|
||||
// Register color buffer with CUDA
|
||||
cudaGraphicsD3D10RegisterResource(&g_color.cudaResource, g_color.pBuffer,
|
||||
cudaGraphicsMapFlagsNone);
|
||||
getLastCudaError(
|
||||
"cudaGraphicsD3D10RegisterResource (g_color.pBuffer) failed");
|
||||
|
||||
// Show the window
|
||||
ShowWindow(hWnd, SW_SHOWDEFAULT);
|
||||
UpdateWindow(hWnd);
|
||||
}
|
||||
|
||||
//
|
||||
// The main loop
|
||||
//
|
||||
while (false == g_bDone) {
|
||||
Render();
|
||||
|
||||
//
|
||||
// handle I/O
|
||||
//
|
||||
MSG msg;
|
||||
ZeroMemory(&msg, sizeof(msg));
|
||||
|
||||
while (msg.message != WM_QUIT) {
|
||||
if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE)) {
|
||||
TranslateMessage(&msg);
|
||||
DispatchMessage(&msg);
|
||||
} else {
|
||||
Render();
|
||||
|
||||
if (ref_file) {
|
||||
for (int count = 0; count < g_iFrameToCompare; count++) {
|
||||
Render();
|
||||
}
|
||||
|
||||
const char *cur_image_path = "simpleD3D10RenderTarget.ppm";
|
||||
|
||||
// Save a reference of our current test run image
|
||||
CheckRenderD3D10::ActiveRenderTargetToPPM(g_pd3dDevice,
|
||||
cur_image_path);
|
||||
|
||||
// compare to offical reference image, printing PASS or FAIL.
|
||||
g_bPassed = CheckRenderD3D10::PPMvsPPM(cur_image_path, ref_file,
|
||||
argv[0], MAX_EPSILON, 0.15f);
|
||||
|
||||
g_bDone = true;
|
||||
|
||||
Cleanup();
|
||||
|
||||
PostQuitMessage(0);
|
||||
} else {
|
||||
g_bPassed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Release D3D Library (after message loop)
|
||||
dynlinkUnloadD3D10API();
|
||||
|
||||
// Unregister windows class
|
||||
UnregisterClass(wc.lpszClassName, wc.hInstance);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Run the Cuda part of the computation
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void runCuda() {
|
||||
cudaStream_t stream = 0;
|
||||
const int nbResources = 2;
|
||||
cudaGraphicsResource *ppResources[nbResources] = {
|
||||
g_histogram.cudaResource, g_color.cudaResource,
|
||||
};
|
||||
// Map resources for Cuda
|
||||
checkCudaErrors(cudaGraphicsMapResources(nbResources, ppResources, stream));
|
||||
getLastCudaError("cudaGraphicsMapResources(2) failed");
|
||||
// Get pointers
|
||||
checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
|
||||
(void **)&g_histogram.cudaBuffer, &g_histogram.size,
|
||||
g_histogram.cudaResource));
|
||||
getLastCudaError(
|
||||
"cudaGraphicsResourceGetMappedPointer (g_color.pBuffer) failed");
|
||||
cudaGraphicsSubResourceGetMappedArray(&g_color.pCudaArray,
|
||||
g_color.cudaResource, 0, 0);
|
||||
getLastCudaError(
|
||||
"cudaGraphicsSubResourceGetMappedArray (g_color.pBuffer) failed");
|
||||
|
||||
// Execute kernel
|
||||
createHistogramTex(g_histogram.cudaBuffer, g_WindowWidth, g_WindowHeight,
|
||||
g_color.pCudaArray);
|
||||
checkCudaError();
|
||||
//
|
||||
// unmap the resources
|
||||
//
|
||||
checkCudaErrors(cudaGraphicsUnmapResources(nbResources, ppResources, stream));
|
||||
getLastCudaError("cudaGraphicsUnmapResources(2) failed");
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: InitD3D()
|
||||
// Desc: Initializes Direct3D
|
||||
//-----------------------------------------------------------------------------
|
||||
HRESULT InitD3D(HWND hWnd) {
|
||||
// Set up the structure used to create the device and swapchain
|
||||
DXGI_SWAP_CHAIN_DESC sd;
|
||||
ZeroMemory(&sd, sizeof(sd));
|
||||
sd.BufferCount = 1;
|
||||
sd.BufferDesc.Width = g_WindowWidth;
|
||||
sd.BufferDesc.Height = g_WindowHeight;
|
||||
sd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
sd.BufferDesc.RefreshRate.Numerator = 60;
|
||||
sd.BufferDesc.RefreshRate.Denominator = 1;
|
||||
sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
|
||||
sd.OutputWindow = hWnd;
|
||||
sd.SampleDesc.Count = 1;
|
||||
sd.SampleDesc.Quality = 0;
|
||||
sd.Windowed = TRUE;
|
||||
|
||||
// Create device and swapchain
|
||||
HRESULT hr = sFnPtr_D3D10CreateDeviceAndSwapChain(
|
||||
g_pCudaCapableAdapter, D3D10_DRIVER_TYPE_HARDWARE, NULL,
|
||||
0, // D3D10_CREATE_DEVICE_DEBUG,
|
||||
D3D10_SDK_VERSION, &sd, &g_pSwapChain, &g_pd3dDevice);
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
|
||||
g_pCudaCapableAdapter->Release();
|
||||
|
||||
// Create a render target view of the swapchain
|
||||
ID3D10Texture2D *pBuffer;
|
||||
hr =
|
||||
g_pSwapChain->GetBuffer(0, __uuidof(ID3D10Texture2D), (LPVOID *)&pBuffer);
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
|
||||
hr = g_pd3dDevice->CreateRenderTargetView(pBuffer, NULL, &g_pSwapChainRTV);
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
pBuffer->Release();
|
||||
|
||||
// Create a color buffer, corresponding render target view and shader resource
|
||||
// view
|
||||
D3D10_TEXTURE2D_DESC tex2Ddesc;
|
||||
ZeroMemory(&tex2Ddesc, sizeof(D3D10_TEXTURE2D_DESC));
|
||||
tex2Ddesc.Width = g_WindowWidth;
|
||||
tex2Ddesc.Height = g_WindowHeight;
|
||||
tex2Ddesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
tex2Ddesc.MipLevels = 1;
|
||||
tex2Ddesc.ArraySize = 1;
|
||||
tex2Ddesc.SampleDesc.Count = 1;
|
||||
tex2Ddesc.Usage = D3D10_USAGE_DEFAULT;
|
||||
tex2Ddesc.BindFlags = D3D10_BIND_RENDER_TARGET | D3D10_BIND_SHADER_RESOURCE;
|
||||
|
||||
hr = g_pd3dDevice->CreateTexture2D(&tex2Ddesc, NULL, &g_color.pBuffer);
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
|
||||
hr = g_pd3dDevice->CreateShaderResourceView(g_color.pBuffer, NULL,
|
||||
&g_color.pBufferSRV);
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
|
||||
hr = g_pd3dDevice->CreateRenderTargetView(g_color.pBuffer, NULL,
|
||||
&g_color.pBufferRTV);
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
|
||||
// Create a buffer which will contain the resulting histogram and the SRV to
|
||||
// plug it
|
||||
D3D10_BUFFER_DESC bufferDesc;
|
||||
bufferDesc.Usage = D3D10_USAGE_DEFAULT;
|
||||
// NOTE: allocation of more than what is needed to display in the shader
|
||||
// but this 64 factor is required for CUDA to work with this buffer (see
|
||||
// BLOCK_N in .cu code...)
|
||||
bufferDesc.ByteWidth =
|
||||
sizeof(unsigned int) * g_HistogramSize * 64 /*BLOCK_N*/;
|
||||
bufferDesc.BindFlags = D3D10_BIND_SHADER_RESOURCE;
|
||||
bufferDesc.CPUAccessFlags = 0;
|
||||
bufferDesc.MiscFlags = 0;
|
||||
// useless values... we could remove this...
|
||||
unsigned int values[256 * 64];
|
||||
|
||||
for (int i = 0; i < 256 * 64; i++) {
|
||||
values[i] = i;
|
||||
}
|
||||
|
||||
D3D10_SUBRESOURCE_DATA data;
|
||||
data.pSysMem = values;
|
||||
data.SysMemPitch = 0;
|
||||
data.SysMemSlicePitch = 0;
|
||||
|
||||
hr = g_pd3dDevice->CreateBuffer(&bufferDesc, &data, &g_histogram.pBuffer);
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
|
||||
D3D10_SHADER_RESOURCE_VIEW_DESC bufferSRVDesc;
|
||||
bufferSRVDesc.Format = DXGI_FORMAT_R32_UINT;
|
||||
bufferSRVDesc.ViewDimension = D3D10_SRV_DIMENSION_BUFFER;
|
||||
bufferSRVDesc.Buffer.ElementOffset = 0;
|
||||
bufferSRVDesc.Buffer.ElementWidth =
|
||||
g_HistogramSize; // 4*sizeof(unsigned int);
|
||||
|
||||
hr = g_pd3dDevice->CreateShaderResourceView(
|
||||
g_histogram.pBuffer, &bufferSRVDesc, &g_histogram.pBufferSRV);
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
// Create the equivalent as a cuda staging buffer that we'll use to write from
|
||||
// Cuda. Then we'll copy it to the texture
|
||||
// cudaMalloc(g_histogram.cudaBuffer, sizeof(float) * g_HistogramSize;
|
||||
// getLastCudaError("cudaMallocPitch (g_histogram) failed");
|
||||
|
||||
// Setup the viewport
|
||||
D3D10_VIEWPORT vp;
|
||||
vp.Width = g_WindowWidth;
|
||||
vp.Height = g_WindowHeight;
|
||||
vp.MinDepth = 0.0f;
|
||||
vp.MaxDepth = 1.0f;
|
||||
vp.TopLeftX = 0;
|
||||
vp.TopLeftY = 0;
|
||||
g_pd3dDevice->RSSetViewports(1, &vp);
|
||||
|
||||
// Setup the effect
|
||||
{
|
||||
ID3D10Blob *pErrors = NULL;
|
||||
ID3D10Blob *pCompiledEffect;
|
||||
hr = sFnPtr_D3D10CompileEffectFromMemory((void *)g_displayEffectSrc,
|
||||
sizeof(g_displayEffectSrc), NULL,
|
||||
NULL, // pDefines
|
||||
NULL, // pIncludes
|
||||
0, // HLSL flags
|
||||
0, // FXFlags
|
||||
&pCompiledEffect, &pErrors);
|
||||
|
||||
if (pErrors) {
|
||||
LPVOID l_pError = NULL;
|
||||
l_pError = pErrors->GetBufferPointer(); // then cast to a char* to see it
|
||||
// in the locals window
|
||||
fprintf(stdout, "Compilation error: \n %s", (char *)l_pError);
|
||||
}
|
||||
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
|
||||
hr = sFnPtr_D3D10CreateEffectFromMemory(
|
||||
pCompiledEffect->GetBufferPointer(), pCompiledEffect->GetBufferSize(),
|
||||
0, // FXFlags
|
||||
g_pd3dDevice, NULL, &g_pDisplayEffect);
|
||||
pCompiledEffect->Release();
|
||||
|
||||
g_pDisplayTechnique = g_pDisplayEffect->GetTechniqueByName("Render");
|
||||
|
||||
g_pTime = g_pDisplayEffect->GetVariableByName("g_Time")->AsScalar();
|
||||
}
|
||||
|
||||
// Setup the effect
|
||||
{
|
||||
ID3D10Blob *pCompiledEffect;
|
||||
ID3D10Blob *pErrors = NULL;
|
||||
hr = sFnPtr_D3D10CompileEffectFromMemory((void *)g_compositeEffectSrc,
|
||||
sizeof(g_compositeEffectSrc), NULL,
|
||||
NULL, // pDefines
|
||||
NULL, // pIncludes
|
||||
0, // HLSL flags
|
||||
0, // FXFlags
|
||||
&pCompiledEffect, &pErrors);
|
||||
|
||||
if (pErrors) {
|
||||
LPVOID l_pError = NULL;
|
||||
l_pError = pErrors->GetBufferPointer(); // then cast to a char* to see it
|
||||
// in the locals window
|
||||
fprintf(stdout, "Compilation error: \n %s", (char *)l_pError);
|
||||
}
|
||||
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
|
||||
hr = sFnPtr_D3D10CreateEffectFromMemory(
|
||||
pCompiledEffect->GetBufferPointer(), pCompiledEffect->GetBufferSize(),
|
||||
0, // FXFlags
|
||||
g_pd3dDevice, NULL, &g_pCompositeEffect);
|
||||
pCompiledEffect->Release();
|
||||
|
||||
g_pCompositeTechnique = g_pCompositeEffect->GetTechniqueByName("Render");
|
||||
|
||||
g_pvQuadRect =
|
||||
g_pCompositeEffect->GetVariableByName("g_vQuadRect")->AsVector();
|
||||
g_pUseCase = g_pCompositeEffect->GetVariableByName("g_UseCase")->AsScalar();
|
||||
|
||||
g_pTexture2D = g_pCompositeEffect->GetVariableByName("g_Texture2D")
|
||||
->AsShaderResource();
|
||||
g_pTexture2D->SetResource(g_color.pBufferSRV);
|
||||
|
||||
g_pHistogram = g_pCompositeEffect->GetVariableByName("g_Histogram")
|
||||
->AsShaderResource();
|
||||
g_pHistogram->SetResource(g_histogram.pBufferSRV);
|
||||
}
|
||||
|
||||
D3D10_RASTERIZER_DESC rasterizerState;
|
||||
rasterizerState.FillMode = D3D10_FILL_SOLID;
|
||||
rasterizerState.CullMode = D3D10_CULL_FRONT;
|
||||
rasterizerState.FrontCounterClockwise = false;
|
||||
rasterizerState.DepthBias = false;
|
||||
rasterizerState.DepthBiasClamp = 0;
|
||||
rasterizerState.SlopeScaledDepthBias = 0;
|
||||
rasterizerState.DepthClipEnable = false;
|
||||
rasterizerState.ScissorEnable = false;
|
||||
rasterizerState.MultisampleEnable = false;
|
||||
rasterizerState.AntialiasedLineEnable = false;
|
||||
g_pd3dDevice->CreateRasterizerState(&rasterizerState, &g_pRasterState);
|
||||
g_pd3dDevice->RSSetState(g_pRasterState);
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: Cleanup()
|
||||
// Desc: Releases all previously initialized objects
|
||||
//-----------------------------------------------------------------------------
|
||||
VOID Cleanup() {
|
||||
if (g_histogram.pBuffer != NULL) {
|
||||
// Unregister vertex buffer
|
||||
cudaGraphicsUnregisterResource(g_histogram.cudaResource);
|
||||
getLastCudaError("cudaGraphicsUnregisterResource failed");
|
||||
g_histogram.pBuffer->Release();
|
||||
}
|
||||
|
||||
if (g_histogram.pBufferSRV != NULL) {
|
||||
g_histogram.pBufferSRV->Release();
|
||||
}
|
||||
|
||||
if (g_pDisplayEffect != NULL) {
|
||||
g_pDisplayEffect->Release();
|
||||
}
|
||||
|
||||
if (g_pCompositeEffect != NULL) {
|
||||
g_pCompositeEffect->Release();
|
||||
}
|
||||
|
||||
if (g_color.pBufferSRV != NULL) {
|
||||
g_color.pBufferSRV->Release();
|
||||
}
|
||||
|
||||
if (g_color.pBufferRTV != NULL) {
|
||||
g_color.pBufferRTV->Release();
|
||||
}
|
||||
|
||||
if (g_color.pBuffer != NULL) {
|
||||
// Unregister vertex buffer
|
||||
cudaGraphicsUnregisterResource(g_color.cudaResource);
|
||||
getLastCudaError("cudaD3D10UnregisterResource failed");
|
||||
g_color.pBuffer->Release();
|
||||
}
|
||||
|
||||
if (g_pRasterState != NULL) {
|
||||
g_pRasterState->Release();
|
||||
}
|
||||
|
||||
if (g_pSwapChainRTV != NULL) {
|
||||
g_pSwapChainRTV->Release();
|
||||
}
|
||||
|
||||
if (g_pSwapChain != NULL) {
|
||||
g_pSwapChain->Release();
|
||||
}
|
||||
|
||||
if (g_pd3dDevice != NULL) {
|
||||
g_pd3dDevice->Release();
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: Render()
|
||||
// Desc: Draws the scene
|
||||
//-----------------------------------------------------------------------------
|
||||
VOID Render() {
|
||||
g_pd3dDevice->RSSetState(g_pRasterState);
|
||||
|
||||
// Draw frame
|
||||
{
|
||||
static float time = 0.f;
|
||||
time += 0.001f;
|
||||
g_pTime->SetFloat(time);
|
||||
|
||||
// Clear the Color to a black color
|
||||
float ClearColor[4] = {0.f, 0.1f, 0.1f, 1.f};
|
||||
g_pd3dDevice->ClearRenderTargetView(g_color.pBufferRTV, ClearColor);
|
||||
g_pd3dDevice->OMSetRenderTargets(1, &g_color.pBufferRTV, NULL);
|
||||
|
||||
g_pd3dDevice->IASetInputLayout(0);
|
||||
g_pd3dDevice->IASetPrimitiveTopology(
|
||||
D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
||||
|
||||
g_pDisplayTechnique->GetPassByIndex(0)->Apply(0);
|
||||
g_pd3dDevice->DrawInstanced(4, 400, 0, 0);
|
||||
}
|
||||
|
||||
// Run CUDA to compute the histogram
|
||||
runCuda();
|
||||
|
||||
// draw the 2d texture
|
||||
{
|
||||
// Clear the Color to a black color
|
||||
float ClearColor[4] = {0, 0, 0, 1.f};
|
||||
g_pd3dDevice->ClearRenderTargetView(g_pSwapChainRTV, ClearColor);
|
||||
g_pd3dDevice->OMSetRenderTargets(1, &g_pSwapChainRTV, NULL);
|
||||
|
||||
g_pd3dDevice->IASetInputLayout(0);
|
||||
g_pd3dDevice->IASetPrimitiveTopology(
|
||||
D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
||||
|
||||
g_pTexture2D->SetResource(g_color.pBufferSRV);
|
||||
g_pHistogram->SetResource(g_histogram.pBufferSRV);
|
||||
|
||||
g_pUseCase->SetInt(0);
|
||||
float quadRect1[4] = {-1.0f, -0.8f, 2.0f, 1.8f};
|
||||
g_pvQuadRect->SetFloatVector((float *)&quadRect1);
|
||||
|
||||
g_pCompositeTechnique->GetPassByIndex(0)->Apply(0);
|
||||
g_pd3dDevice->Draw(4, 0);
|
||||
|
||||
g_pUseCase->SetInt(1);
|
||||
float quadRect2[4] = {-0.8f, -0.99f, 1.6f, 0.19f};
|
||||
g_pvQuadRect->SetFloatVector((float *)&quadRect2);
|
||||
|
||||
g_pCompositeTechnique->GetPassByIndex(0)->Apply(0);
|
||||
g_pd3dDevice->Draw(4, 0);
|
||||
|
||||
g_pTexture2D->SetResource(NULL);
|
||||
g_pHistogram->SetResource(NULL);
|
||||
g_pCompositeTechnique->GetPassByIndex(0)->Apply(0);
|
||||
}
|
||||
|
||||
// Present the backbuffer contents to the display
|
||||
g_pSwapChain->Present(0, 0);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: MsgProc()
|
||||
// Desc: The window's message handler
|
||||
//-----------------------------------------------------------------------------
|
||||
LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) {
|
||||
switch (msg) {
|
||||
case WM_DESTROY:
|
||||
case WM_KEYDOWN:
|
||||
if (msg != WM_KEYDOWN || wParam == 27) {
|
||||
g_bDone = true;
|
||||
Cleanup();
|
||||
PostQuitMessage(0);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return DefWindowProc(hWnd, msg, wParam, lParam);
|
||||
}
|
@ -1,223 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* This example demonstrates how to use the CUDA Direct3D bindings with the
|
||||
* runtime API.
|
||||
* Device code.
|
||||
*/
|
||||
|
||||
#ifndef SIMPLED3D10RENDERTARGET_KERNEL_CU
|
||||
#define SIMPLED3D10RENDERTARGET_KERNEL_CU
|
||||
|
||||
// includes, C string library
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// includes, cuda
|
||||
#include <cuda.h>
|
||||
#include <builtin_types.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
// includes, project
|
||||
#include <helper_cuda.h> // includes cuda.h and cuda_runtime_api.h
|
||||
//#include "checkCudaErrors"
|
||||
|
||||
#define BIN_COUNT 256
|
||||
#define HISTOGRAM_SIZE (BIN_COUNT * sizeof(unsigned int))
|
||||
|
||||
texture<uchar4, 2, cudaReadModeElementType> colorTex;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// GPU-specific definitions
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Fast mul on G8x / G9x / G100
|
||||
#define IMUL(a, b) __mul24(a, b)
|
||||
|
||||
// Machine warp size
|
||||
// G80's warp size is 32 threads
|
||||
#define WARP_LOG2SIZE 5
|
||||
|
||||
// Warps in thread block for histogram256Kernel()
|
||||
#define WARP_N 6
|
||||
|
||||
// Corresponding thread block size in threads for histogram256Kernel()
|
||||
#define THREAD_N (WARP_N << WARP_LOG2SIZE)
|
||||
|
||||
// Total histogram size (in counters) per thread block for histogram256Kernel()
|
||||
#define BLOCK_MEMORY (WARP_N * BIN_COUNT)
|
||||
|
||||
// Thread block count for histogram256Kernel()
|
||||
#define BLOCK_N 64
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// If threadPos == threadIdx.x, there are always 4-way bank conflicts,
|
||||
// since each group of 16 threads (half-warp) accesses different bytes,
|
||||
// but only within 4 shared memory banks. Having shuffled bits of threadIdx.x
|
||||
// as in histogram64GPU(), each half-warp accesses different shared memory banks
|
||||
// avoiding any bank conflicts at all.
|
||||
// Refer to the supplied whitepaper for detailed explanations.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
__device__ inline void addData256(volatile unsigned int *s_WarpHist,
|
||||
unsigned int data, unsigned int threadTag) {
|
||||
unsigned int count;
|
||||
|
||||
do {
|
||||
count = s_WarpHist[data] & 0x07FFFFFFU;
|
||||
count = threadTag | (count + 1);
|
||||
s_WarpHist[data] = count;
|
||||
} while (s_WarpHist[data] != count);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Main histogram calculation kernel
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
static __global__ void histogramTex256Kernel(unsigned int *d_Result,
|
||||
unsigned int width,
|
||||
unsigned int height, int dataN) {
|
||||
// Current global thread index
|
||||
const int globalTid = IMUL(blockIdx.x, blockDim.x) + threadIdx.x;
|
||||
// Total number of threads in the compute grid
|
||||
const int numThreads = IMUL(blockDim.x, gridDim.x);
|
||||
|
||||
// Thread tag for addData256()
|
||||
// WARP_LOG2SIZE higher bits of counter values are tagged
|
||||
// by lower WARP_LOG2SIZE threadID bits
|
||||
const unsigned int threadTag = threadIdx.x << (32 - WARP_LOG2SIZE);
|
||||
|
||||
// Shared memory storage for each warp
|
||||
volatile __shared__ unsigned int s_Hist[BLOCK_MEMORY];
|
||||
|
||||
// Current warp shared memory base
|
||||
const int warpBase = (threadIdx.x >> WARP_LOG2SIZE) * BIN_COUNT;
|
||||
|
||||
// Clear shared memory buffer for current thread block before processing
|
||||
for (int pos = threadIdx.x; pos < BLOCK_MEMORY; pos += blockDim.x)
|
||||
s_Hist[pos] = 0;
|
||||
|
||||
// Cycle through the entire data set, update subhistograms for each warp
|
||||
__syncthreads();
|
||||
|
||||
for (int pos = globalTid; pos < dataN; pos += numThreads) {
|
||||
// NOTE: check this... Not sure this is what needs to be done
|
||||
int py = pos / width;
|
||||
int px = pos - (py * width);
|
||||
uchar4 data4 = tex2D(colorTex, px, py);
|
||||
|
||||
addData256(s_Hist + warpBase, (data4.x), threadTag);
|
||||
addData256(s_Hist + warpBase, (data4.y), threadTag);
|
||||
addData256(s_Hist + warpBase, (data4.z), threadTag);
|
||||
addData256(s_Hist + warpBase, (data4.w), threadTag);
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
// Merge per-warp histograms into per-block and write to global memory
|
||||
for (int pos = threadIdx.x; pos < BIN_COUNT; pos += blockDim.x) {
|
||||
unsigned int sum = 0;
|
||||
|
||||
for (int base = 0; base < BLOCK_MEMORY; base += BIN_COUNT)
|
||||
sum += s_Hist[base + pos] & 0x07FFFFFFU;
|
||||
|
||||
d_Result[blockIdx.x * BIN_COUNT + pos] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Merge BLOCK_N subhistograms of BIN_COUNT bins into final histogram
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// gridDim.x == BIN_COUNT
|
||||
// blockDim.x == BLOCK_N
|
||||
// blockIdx.x == bin counter processed by current block
|
||||
// threadIdx.x == subhistogram index
|
||||
static __global__ void mergeHistogramTex256Kernel(unsigned int *d_Result) {
|
||||
__shared__ unsigned int data[BLOCK_N];
|
||||
|
||||
// Reads are uncoalesced, but this final stage takes
|
||||
// only a fraction of total processing time
|
||||
data[threadIdx.x] = d_Result[threadIdx.x * BIN_COUNT + blockIdx.x];
|
||||
|
||||
for (int stride = BLOCK_N / 2; stride > 0; stride >>= 1) {
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x < stride) data[threadIdx.x] += data[threadIdx.x + stride];
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0) d_Result[blockIdx.x] = data[0];
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Host interface to GPU histogram
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern "C" void checkCudaError() {
|
||||
cudaError_t err = cudaGetLastError();
|
||||
|
||||
if (cudaSuccess != err) {
|
||||
fprintf(stderr, "Cuda error: %s.\n", cudaGetErrorString(err));
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
|
||||
// Maximum block count for histogram64kernel()
|
||||
// Limits input data size to 756MB
|
||||
// const int MAX_BLOCK_N = 16384;
|
||||
|
||||
// Internal memory allocation
|
||||
// const int BLOCK_N2 = 32;
|
||||
|
||||
extern "C" void createHistogramTex(unsigned int *h_Result, unsigned int width,
|
||||
unsigned int height, cudaArray *colorArray) {
|
||||
cudaBindTextureToArray(colorTex, colorArray);
|
||||
checkCudaError();
|
||||
|
||||
histogramTex256Kernel<<<BLOCK_N, THREAD_N>>>(h_Result, width, height,
|
||||
width * height / 4);
|
||||
checkCudaError();
|
||||
|
||||
mergeHistogramTex256Kernel<<<BIN_COUNT, BLOCK_N>>>(h_Result);
|
||||
checkCudaError();
|
||||
|
||||
cudaUnbindTexture(colorTex);
|
||||
checkCudaError();
|
||||
|
||||
#if 0
|
||||
// Dummy fill test
|
||||
unsigned int toto[256];
|
||||
|
||||
for (int i=0; i<256; i++)
|
||||
{
|
||||
toto[i] = i * 100;
|
||||
}
|
||||
cudaMemcpy(h_Result, toto, HISTOGRAM_SIZE, cudaMemcpyHostToDevice);
|
||||
#endif
|
||||
checkCudaError();
|
||||
}
|
||||
|
||||
extern "C" void bindArrayToTexture(cudaArray *pArray) {}
|
||||
|
||||
#endif // #ifndef SIMPLED3D10RENDERTARGET_KERNEL_CU
|
@ -1,46 +0,0 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Target rules
|
||||
all: build
|
||||
|
||||
build:
|
||||
$(info >>> WARNING - simpleD3D10Texture is not supported on Linux - waiving sample <<<)
|
||||
|
||||
run: build
|
||||
|
||||
testrun: build
|
||||
|
||||
clean:
|
||||
|
||||
clobber: clean
|
@ -1,49 +0,0 @@
|
||||
# simpleD3D10Texture - Simple D3D10 Texture
|
||||
|
||||
## Description
|
||||
|
||||
Simple program which demonstrates how to interoperate CUDA with Direct3D10 Texture. The program creates a number of D3D10 Textures (2D, 3D, and CubeMap) which are generated from CUDA kernels. Direct3D then renders the results on the screen. A Direct3D10 Capable device is required.
|
||||
|
||||
## Key Concepts
|
||||
|
||||
Graphics Interop, Texture
|
||||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
Windows
|
||||
|
||||
## Supported CPU Architecture
|
||||
|
||||
x86_64
|
||||
|
||||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaGetErrorString, cudaFree, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGetDeviceProperties
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[DirectX](../../../README.md#directx)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
||||
### Windows
|
||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
||||
```
|
||||
*_vs<version>.sln - for Visual Studio <version>
|
||||
```
|
||||
Each individual sample has its own set of solution files in its directory:
|
||||
|
||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
||||
|
||||
## References (for more details)
|
||||
|
File diff suppressed because one or more lines are too long
@ -1,970 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* This example demonstrates how to use the CUDA Direct3D bindings to
|
||||
* transfer data between CUDA and DX9 2D, CubeMap, and Volume Textures.
|
||||
*/
|
||||
|
||||
#pragma warning(disable : 4312)
|
||||
|
||||
#include <windows.h>
|
||||
#include <mmsystem.h>
|
||||
|
||||
// This header inclues all the necessary D3D10 and CUDA includes
|
||||
#include <dynlink_d3d10.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cuda_d3d10_interop.h>
|
||||
|
||||
// includes, project
|
||||
#include <rendercheck_d3d10.h>
|
||||
#include <helper_cuda.h> // helper functions for CUDA error checking and initialization
|
||||
|
||||
#define MAX_EPSILON 10
|
||||
|
||||
static char *SDK_name = "simpleD3D10Texture";
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Global variables
|
||||
//-----------------------------------------------------------------------------
|
||||
IDXGIAdapter *g_pCudaCapableAdapter = NULL; // Adapter to use
|
||||
ID3D10Device *g_pd3dDevice = NULL; // Our rendering device
|
||||
IDXGISwapChain *g_pSwapChain = NULL; // The swap chain of the window
|
||||
ID3D10RenderTargetView *g_pSwapChainRTV =
|
||||
NULL; // The Render target view on the swap chain ( used for clear)
|
||||
ID3D10RasterizerState *g_pRasterState = NULL;
|
||||
|
||||
ID3D10InputLayout *g_pInputLayout = NULL;
|
||||
ID3D10Effect *g_pSimpleEffect = NULL;
|
||||
ID3D10EffectTechnique *g_pSimpleTechnique = NULL;
|
||||
ID3D10EffectVectorVariable *g_pvQuadRect = NULL;
|
||||
ID3D10EffectScalarVariable *g_pUseCase = NULL;
|
||||
ID3D10EffectShaderResourceVariable *g_pTexture2D = NULL;
|
||||
ID3D10EffectShaderResourceVariable *g_pTexture3D = NULL;
|
||||
ID3D10EffectShaderResourceVariable *g_pTextureCube = NULL;
|
||||
|
||||
static const char g_simpleEffectSrc[] =
|
||||
"float4 g_vQuadRect; \n"
|
||||
"int g_UseCase; \n"
|
||||
"Texture2D g_Texture2D; \n"
|
||||
"Texture3D g_Texture3D; \n"
|
||||
"TextureCube g_TextureCube; \n"
|
||||
"\n"
|
||||
"SamplerState samLinear{ \n"
|
||||
" Filter = MIN_MAG_LINEAR_MIP_POINT; \n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"struct Fragment{ \n"
|
||||
" float4 Pos : SV_POSITION;\n"
|
||||
" float3 Tex : TEXCOORD0; };\n"
|
||||
"\n"
|
||||
"Fragment VS( uint vertexId : SV_VertexID )\n"
|
||||
"{\n"
|
||||
" Fragment f;\n"
|
||||
" f.Tex = float3( 0.f, 0.f, 0.f); \n"
|
||||
" if (vertexId == 1) f.Tex.x = 1.f; \n"
|
||||
" else if (vertexId == 2) f.Tex.y = 1.f; \n"
|
||||
" else if (vertexId == 3) f.Tex.xy = float2(1.f, 1.f); \n"
|
||||
" \n"
|
||||
" f.Pos = float4( g_vQuadRect.xy + f.Tex * g_vQuadRect.zw, 0, 1);\n"
|
||||
" \n"
|
||||
" if (g_UseCase == 1) { \n"
|
||||
" if (vertexId == 1) f.Tex.z = 0.5f; \n"
|
||||
" else if (vertexId == 2) f.Tex.z = 0.5f; \n"
|
||||
" else if (vertexId == 3) f.Tex.z = 1.f; \n"
|
||||
" } \n"
|
||||
" else if (g_UseCase >= 2) { \n"
|
||||
" f.Tex.xy = f.Tex.xy * 2.f - 1.f; \n"
|
||||
" } \n"
|
||||
" return f;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"float4 PS( Fragment f ) : SV_Target\n"
|
||||
"{\n"
|
||||
" if (g_UseCase == 0) return g_Texture2D.Sample( samLinear, f.Tex.xy ); "
|
||||
"\n"
|
||||
" else if (g_UseCase == 1) return g_Texture3D.Sample( samLinear, f.Tex "
|
||||
"); \n"
|
||||
" else if (g_UseCase == 2) return g_TextureCube.Sample( samLinear, "
|
||||
"float3(f.Tex.xy, 1.0) ); \n"
|
||||
" else if (g_UseCase == 3) return g_TextureCube.Sample( samLinear, "
|
||||
"float3(f.Tex.xy, -1.0) ); \n"
|
||||
" else if (g_UseCase == 4) return g_TextureCube.Sample( samLinear, "
|
||||
"float3(1.0, f.Tex.xy) ); \n"
|
||||
" else if (g_UseCase == 5) return g_TextureCube.Sample( samLinear, "
|
||||
"float3(-1.0, f.Tex.xy) ); \n"
|
||||
" else if (g_UseCase == 6) return g_TextureCube.Sample( samLinear, "
|
||||
"float3(f.Tex.x, 1.0, f.Tex.y) ); \n"
|
||||
" else if (g_UseCase == 7) return g_TextureCube.Sample( samLinear, "
|
||||
"float3(f.Tex.x, -1.0, f.Tex.y) ); \n"
|
||||
" else return float4(f.Tex, 1);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"technique10 Render\n"
|
||||
"{\n"
|
||||
" pass P0\n"
|
||||
" {\n"
|
||||
" SetVertexShader( CompileShader( vs_4_0, VS() ) );\n"
|
||||
" SetGeometryShader( NULL );\n"
|
||||
" SetPixelShader( CompileShader( ps_4_0, PS() ) );\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n";
|
||||
|
||||
// testing/tracing function used pervasively in tests. if the condition is
|
||||
// unsatisfied
|
||||
// then spew and fail the function immediately (doing no cleanup)
|
||||
#define AssertOrQuit(x) \
|
||||
if (!(x)) { \
|
||||
fprintf(stdout, "Assert unsatisfied in %s at %s:%d\n", __FUNCTION__, \
|
||||
__FILE__, __LINE__); \
|
||||
return 1; \
|
||||
}
|
||||
|
||||
bool g_bDone = false;
|
||||
bool g_bPassed = true;
|
||||
|
||||
const unsigned int g_WindowWidth = 720;
|
||||
const unsigned int g_WindowHeight = 720;
|
||||
|
||||
int g_iFrameToCompare = 10;
|
||||
|
||||
int *pArgc = NULL;
|
||||
char **pArgv = NULL;
|
||||
|
||||
// Data structure for 2D texture shared between DX10 and CUDA
|
||||
struct {
|
||||
ID3D10Texture2D *pTexture;
|
||||
ID3D10ShaderResourceView *pSRView;
|
||||
cudaGraphicsResource *cudaResource;
|
||||
void *cudaLinearMemory;
|
||||
size_t pitch;
|
||||
int width;
|
||||
int height;
|
||||
} g_texture_2d;
|
||||
|
||||
// Data structure for volume textures shared between DX10 and CUDA
|
||||
struct {
|
||||
ID3D10Texture3D *pTexture;
|
||||
ID3D10ShaderResourceView *pSRView;
|
||||
cudaGraphicsResource *cudaResource;
|
||||
void *cudaLinearMemory;
|
||||
size_t pitch;
|
||||
int width;
|
||||
int height;
|
||||
int depth;
|
||||
} g_texture_3d;
|
||||
|
||||
// Data structure for cube texture shared between DX10 and CUDA
|
||||
struct {
|
||||
ID3D10Texture2D *pTexture;
|
||||
ID3D10ShaderResourceView *pSRView;
|
||||
cudaGraphicsResource *cudaResource;
|
||||
void *cudaLinearMemory;
|
||||
size_t pitch;
|
||||
int size;
|
||||
} g_texture_cube;
|
||||
|
||||
// The CUDA kernel launchers that get called
|
||||
extern "C" {
|
||||
bool cuda_texture_2d(void *surface, size_t width, size_t height, size_t pitch,
|
||||
float t);
|
||||
bool cuda_texture_3d(void *surface, int width, int height, int depth,
|
||||
size_t pitch, size_t pitchslice, float t);
|
||||
bool cuda_texture_cube(void *surface, int width, int height, size_t pitch,
|
||||
int face, float t);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Forward declarations
|
||||
//-----------------------------------------------------------------------------
|
||||
HRESULT InitD3D(HWND hWnd);
|
||||
HRESULT InitTextures();
|
||||
|
||||
void RunKernels();
|
||||
void DrawScene();
|
||||
void Cleanup();
|
||||
void Render();
|
||||
|
||||
LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
|
||||
|
||||
#define NAME_LEN 512
|
||||
|
||||
bool findCUDADevice() {
|
||||
int nGraphicsGPU = 0;
|
||||
int deviceCount = 0;
|
||||
bool bFoundGraphics = false;
|
||||
char devname[NAME_LEN];
|
||||
|
||||
// This function call returns 0 if there are no CUDA capable devices.
|
||||
cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
|
||||
|
||||
if (error_id != cudaSuccess) {
|
||||
printf("cudaGetDeviceCount returned %d\n-> %s\n", (int)error_id,
|
||||
cudaGetErrorString(error_id));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (deviceCount == 0) {
|
||||
printf("> There are no device(s) supporting CUDA\n");
|
||||
return false;
|
||||
} else {
|
||||
printf("> Found %d CUDA Capable Device(s)\n", deviceCount);
|
||||
}
|
||||
|
||||
// Get CUDA device properties
|
||||
cudaDeviceProp deviceProp;
|
||||
|
||||
for (int dev = 0; dev < deviceCount; ++dev) {
|
||||
cudaGetDeviceProperties(&deviceProp, dev);
|
||||
STRCPY(devname, NAME_LEN, deviceProp.name);
|
||||
printf("> GPU %d: %s\n", dev, devname);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool findDXDevice(char *dev_name) {
|
||||
HRESULT hr = S_OK;
|
||||
cudaError cuStatus;
|
||||
|
||||
// Iterate through the candidate adapters
|
||||
IDXGIFactory *pFactory;
|
||||
hr = sFnPtr_CreateDXGIFactory(__uuidof(IDXGIFactory), (void **)(&pFactory));
|
||||
|
||||
if (!SUCCEEDED(hr)) {
|
||||
printf("> No DXGI Factory created.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
UINT adapter = 0;
|
||||
|
||||
for (; !g_pCudaCapableAdapter; ++adapter) {
|
||||
// Get a candidate DXGI adapter
|
||||
IDXGIAdapter *pAdapter = NULL;
|
||||
hr = pFactory->EnumAdapters(adapter, &pAdapter);
|
||||
|
||||
if (FAILED(hr)) {
|
||||
break; // no compatible adapters found
|
||||
}
|
||||
|
||||
// Query to see if there exists a corresponding compute device
|
||||
int cuDevice;
|
||||
cuStatus = cudaD3D10GetDevice(&cuDevice, pAdapter);
|
||||
printLastCudaError("cudaD3D10GetDevice failed"); // This prints and resets
|
||||
// the cudaError to
|
||||
// cudaSuccess
|
||||
|
||||
if (cudaSuccess == cuStatus) {
|
||||
// If so, mark it as the one against which to create our d3d10 device
|
||||
g_pCudaCapableAdapter = pAdapter;
|
||||
g_pCudaCapableAdapter->AddRef();
|
||||
}
|
||||
|
||||
pAdapter->Release();
|
||||
}
|
||||
|
||||
printf("> Found %d D3D10 Adapater(s).\n", (int)adapter);
|
||||
|
||||
pFactory->Release();
|
||||
|
||||
if (!g_pCudaCapableAdapter) {
|
||||
printf("> Found 0 D3D10 Adapater(s) /w Compute capability.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
DXGI_ADAPTER_DESC adapterDesc;
|
||||
g_pCudaCapableAdapter->GetDesc(&adapterDesc);
|
||||
wcstombs_s(NULL, dev_name, 256, adapterDesc.Description, 128);
|
||||
|
||||
printf("> Found 1 D3D10 Adapater(s) /w Compute capability.\n");
|
||||
printf("> %s\n", dev_name);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Program main
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int main(int argc, char *argv[]) {
|
||||
char device_name[256];
|
||||
char *ref_file = NULL;
|
||||
|
||||
pArgc = &argc;
|
||||
pArgv = argv;
|
||||
|
||||
printf("[%s] - Starting...\n", SDK_name);
|
||||
|
||||
if (!findCUDADevice()) // Search for CUDA GPU
|
||||
{
|
||||
printf("> CUDA Device NOT found on \"%s\".. Exiting.\n", device_name);
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
// Search for D3D API (locate drivers, does not mean device is found)
|
||||
if (!dynlinkLoadD3D10API()) {
|
||||
printf("> D3D10 API libraries NOT found on.. Exiting.\n");
|
||||
dynlinkUnloadD3D10API();
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
if (!findDXDevice(device_name)) { // Search for D3D Hardware Device
|
||||
printf("> D3D10 Graphics Device NOT found.. Exiting.\n");
|
||||
dynlinkUnloadD3D10API();
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
// command line options
|
||||
if (argc > 1) {
|
||||
// automatied build testing harness
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "file"))
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
|
||||
}
|
||||
|
||||
//
|
||||
// create window
|
||||
//
|
||||
// Register the window class
|
||||
#if 1
|
||||
WNDCLASSEX wc = {sizeof(WNDCLASSEX),
|
||||
CS_CLASSDC,
|
||||
MsgProc,
|
||||
0L,
|
||||
0L,
|
||||
GetModuleHandle(NULL),
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
"CUDA SDK",
|
||||
NULL};
|
||||
RegisterClassEx(&wc);
|
||||
|
||||
// Create the application's window
|
||||
int xBorder = ::GetSystemMetrics(SM_CXSIZEFRAME);
|
||||
int yMenu = ::GetSystemMetrics(SM_CYMENU);
|
||||
int yBorder = ::GetSystemMetrics(SM_CYSIZEFRAME);
|
||||
HWND hWnd = CreateWindow(
|
||||
wc.lpszClassName, "CUDA/D3D10 Texture InterOP", WS_OVERLAPPEDWINDOW, 0, 0,
|
||||
g_WindowWidth + 2 * xBorder, g_WindowHeight + 2 * yBorder + yMenu, NULL,
|
||||
NULL, wc.hInstance, NULL);
|
||||
#else
|
||||
static WNDCLASSEX wc = {
|
||||
sizeof(WNDCLASSEX), CS_CLASSDC, MsgProc, 0L, 0L,
|
||||
GetModuleHandle(NULL), NULL, NULL, NULL, NULL,
|
||||
"CudaD3D9Tex", NULL};
|
||||
RegisterClassEx(&wc);
|
||||
HWND hWnd = CreateWindow("CudaD3D9Tex", "CUDA D3D9 Texture Interop",
|
||||
WS_OVERLAPPEDWINDOW, 0, 0, 800, 320,
|
||||
GetDesktopWindow(), NULL, wc.hInstance, NULL);
|
||||
#endif
|
||||
|
||||
ShowWindow(hWnd, SW_SHOWDEFAULT);
|
||||
UpdateWindow(hWnd);
|
||||
|
||||
// Initialize Direct3D
|
||||
if (SUCCEEDED(InitD3D(hWnd)) && SUCCEEDED(InitTextures())) {
|
||||
// 2D
|
||||
// register the Direct3D resources that we'll use
|
||||
// we'll read to and write from g_texture_2d, so don't set any special map
|
||||
// flags for it
|
||||
cudaGraphicsD3D10RegisterResource(&g_texture_2d.cudaResource,
|
||||
g_texture_2d.pTexture,
|
||||
cudaGraphicsRegisterFlagsNone);
|
||||
getLastCudaError("cudaGraphicsD3D10RegisterResource (g_texture_2d) failed");
|
||||
// cuda cannot write into the texture directly : the texture is seen as a
|
||||
// cudaArray and can only be mapped as a texture
|
||||
// Create a buffer so that cuda can write into it
|
||||
// pixel fmt is DXGI_FORMAT_R32G32B32A32_FLOAT
|
||||
cudaMallocPitch(&g_texture_2d.cudaLinearMemory, &g_texture_2d.pitch,
|
||||
g_texture_2d.width * sizeof(float) * 4,
|
||||
g_texture_2d.height);
|
||||
getLastCudaError("cudaMallocPitch (g_texture_2d) failed");
|
||||
cudaMemset(g_texture_2d.cudaLinearMemory, 1,
|
||||
g_texture_2d.pitch * g_texture_2d.height);
|
||||
|
||||
// CUBE
|
||||
cudaGraphicsD3D10RegisterResource(&g_texture_cube.cudaResource,
|
||||
g_texture_cube.pTexture,
|
||||
cudaGraphicsRegisterFlagsNone);
|
||||
getLastCudaError(
|
||||
"cudaGraphicsD3D10RegisterResource (g_texture_cube) failed");
|
||||
// create the buffer. pixel fmt is DXGI_FORMAT_R8G8B8A8_SNORM
|
||||
cudaMallocPitch(&g_texture_cube.cudaLinearMemory, &g_texture_cube.pitch,
|
||||
g_texture_cube.size * 4, g_texture_cube.size);
|
||||
getLastCudaError("cudaMallocPitch (g_texture_cube) failed");
|
||||
cudaMemset(g_texture_cube.cudaLinearMemory, 1,
|
||||
g_texture_cube.pitch * g_texture_cube.size);
|
||||
getLastCudaError("cudaMemset (g_texture_cube) failed");
|
||||
|
||||
// 3D
|
||||
cudaGraphicsD3D10RegisterResource(&g_texture_3d.cudaResource,
|
||||
g_texture_3d.pTexture,
|
||||
cudaGraphicsRegisterFlagsNone);
|
||||
getLastCudaError("cudaGraphicsD3D10RegisterResource (g_texture_3d) failed");
|
||||
// create the buffer. pixel fmt is DXGI_FORMAT_R8G8B8A8_SNORM
|
||||
// cudaMallocPitch(&g_texture_3d.cudaLinearMemory, &g_texture_3d.pitch,
|
||||
// g_texture_3d.width * 4, g_texture_3d.height * g_texture_3d.depth);
|
||||
cudaMalloc(
|
||||
&g_texture_3d.cudaLinearMemory,
|
||||
g_texture_3d.width * 4 * g_texture_3d.height * g_texture_3d.depth);
|
||||
g_texture_3d.pitch = g_texture_3d.width * 4;
|
||||
getLastCudaError("cudaMallocPitch (g_texture_3d) failed");
|
||||
cudaMemset(g_texture_3d.cudaLinearMemory, 1,
|
||||
g_texture_3d.pitch * g_texture_3d.height * g_texture_3d.depth);
|
||||
getLastCudaError("cudaMemset (g_texture_3d) failed");
|
||||
} else {
|
||||
printf("> WARNING: No D3D10 Device found.\n");
|
||||
g_bPassed = true;
|
||||
exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
}
|
||||
|
||||
//
|
||||
// the main loop
|
||||
//
|
||||
while (false == g_bDone) {
|
||||
Render();
|
||||
|
||||
//
|
||||
// handle I/O
|
||||
//
|
||||
MSG msg;
|
||||
ZeroMemory(&msg, sizeof(msg));
|
||||
|
||||
while (msg.message != WM_QUIT) {
|
||||
if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE)) {
|
||||
TranslateMessage(&msg);
|
||||
DispatchMessage(&msg);
|
||||
} else {
|
||||
Render();
|
||||
|
||||
if (ref_file) {
|
||||
for (int count = 0; count < g_iFrameToCompare; count++) {
|
||||
Render();
|
||||
}
|
||||
|
||||
const char *cur_image_path = "simpleD3D10Texture.ppm";
|
||||
|
||||
// Save a reference of our current test run image
|
||||
CheckRenderD3D10::ActiveRenderTargetToPPM(g_pd3dDevice,
|
||||
cur_image_path);
|
||||
|
||||
// compare to offical reference image, printing PASS or FAIL.
|
||||
g_bPassed = CheckRenderD3D10::PPMvsPPM(cur_image_path, ref_file,
|
||||
argv[0], MAX_EPSILON, 0.15f);
|
||||
|
||||
g_bDone = true;
|
||||
|
||||
Cleanup();
|
||||
|
||||
PostQuitMessage(0);
|
||||
} else {
|
||||
g_bPassed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Release D3D Library (after message loop)
|
||||
dynlinkUnloadD3D10API();
|
||||
|
||||
// Unregister windows class
|
||||
UnregisterClass(wc.lpszClassName, wc.hInstance);
|
||||
|
||||
//
|
||||
// and exit
|
||||
//
|
||||
printf("> %s running on %s exiting...\n", SDK_name, device_name);
|
||||
|
||||
exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: InitD3D()
|
||||
// Desc: Initializes Direct3D
|
||||
//-----------------------------------------------------------------------------
|
||||
HRESULT InitD3D(HWND hWnd) {
|
||||
// Set up the structure used to create the device and swapchain
|
||||
DXGI_SWAP_CHAIN_DESC sd;
|
||||
ZeroMemory(&sd, sizeof(sd));
|
||||
sd.BufferCount = 1;
|
||||
sd.BufferDesc.Width = g_WindowWidth;
|
||||
sd.BufferDesc.Height = g_WindowHeight;
|
||||
sd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
sd.BufferDesc.RefreshRate.Numerator = 60;
|
||||
sd.BufferDesc.RefreshRate.Denominator = 1;
|
||||
sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
|
||||
sd.OutputWindow = hWnd;
|
||||
sd.SampleDesc.Count = 1;
|
||||
sd.SampleDesc.Quality = 0;
|
||||
sd.Windowed = TRUE;
|
||||
|
||||
// Create device and swapchain
|
||||
HRESULT hr = sFnPtr_D3D10CreateDeviceAndSwapChain(
|
||||
g_pCudaCapableAdapter, D3D10_DRIVER_TYPE_HARDWARE, NULL, 0,
|
||||
D3D10_SDK_VERSION, &sd, &g_pSwapChain, &g_pd3dDevice);
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
g_pCudaCapableAdapter->Release();
|
||||
|
||||
// Create a render target view of the swapchain
|
||||
ID3D10Texture2D *pBuffer;
|
||||
hr =
|
||||
g_pSwapChain->GetBuffer(0, __uuidof(ID3D10Texture2D), (LPVOID *)&pBuffer);
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
|
||||
hr = g_pd3dDevice->CreateRenderTargetView(pBuffer, NULL, &g_pSwapChainRTV);
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
pBuffer->Release();
|
||||
|
||||
g_pd3dDevice->OMSetRenderTargets(1, &g_pSwapChainRTV, NULL);
|
||||
|
||||
// Setup the viewport
|
||||
D3D10_VIEWPORT vp;
|
||||
vp.Width = g_WindowWidth;
|
||||
vp.Height = g_WindowHeight;
|
||||
vp.MinDepth = 0.0f;
|
||||
vp.MaxDepth = 1.0f;
|
||||
vp.TopLeftX = 0;
|
||||
vp.TopLeftY = 0;
|
||||
g_pd3dDevice->RSSetViewports(1, &vp);
|
||||
|
||||
// Setup the effect
|
||||
{
|
||||
ID3D10Blob *pCompiledEffect;
|
||||
ID3D10Blob *pErrors = NULL;
|
||||
hr = sFnPtr_D3D10CompileEffectFromMemory((void *)g_simpleEffectSrc,
|
||||
sizeof(g_simpleEffectSrc), NULL,
|
||||
NULL, // pDefines
|
||||
NULL, // pIncludes
|
||||
0, // HLSL flags
|
||||
0, // FXFlags
|
||||
&pCompiledEffect, &pErrors);
|
||||
|
||||
if (pErrors) {
|
||||
LPVOID l_pError = NULL;
|
||||
l_pError = pErrors->GetBufferPointer(); // then cast to a char* to see it
|
||||
// in the locals window
|
||||
fprintf(stdout, "Compilation error: \n %s", (char *)l_pError);
|
||||
}
|
||||
|
||||
AssertOrQuit(SUCCEEDED(hr));
|
||||
|
||||
hr = sFnPtr_D3D10CreateEffectFromMemory(
|
||||
pCompiledEffect->GetBufferPointer(), pCompiledEffect->GetBufferSize(),
|
||||
0, // FXFlags
|
||||
g_pd3dDevice, NULL, &g_pSimpleEffect);
|
||||
pCompiledEffect->Release();
|
||||
|
||||
g_pSimpleTechnique = g_pSimpleEffect->GetTechniqueByName("Render");
|
||||
|
||||
g_pvQuadRect =
|
||||
g_pSimpleEffect->GetVariableByName("g_vQuadRect")->AsVector();
|
||||
g_pUseCase = g_pSimpleEffect->GetVariableByName("g_UseCase")->AsScalar();
|
||||
|
||||
g_pTexture2D =
|
||||
g_pSimpleEffect->GetVariableByName("g_Texture2D")->AsShaderResource();
|
||||
g_pTexture3D =
|
||||
g_pSimpleEffect->GetVariableByName("g_Texture3D")->AsShaderResource();
|
||||
g_pTextureCube =
|
||||
g_pSimpleEffect->GetVariableByName("g_TextureCube")->AsShaderResource();
|
||||
|
||||
// Setup no Input Layout
|
||||
g_pd3dDevice->IASetInputLayout(0);
|
||||
g_pd3dDevice->IASetPrimitiveTopology(
|
||||
D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
||||
}
|
||||
|
||||
D3D10_RASTERIZER_DESC rasterizerState;
|
||||
rasterizerState.FillMode = D3D10_FILL_SOLID;
|
||||
rasterizerState.CullMode = D3D10_CULL_FRONT;
|
||||
rasterizerState.FrontCounterClockwise = false;
|
||||
rasterizerState.DepthBias = false;
|
||||
rasterizerState.DepthBiasClamp = 0;
|
||||
rasterizerState.SlopeScaledDepthBias = 0;
|
||||
rasterizerState.DepthClipEnable = false;
|
||||
rasterizerState.ScissorEnable = false;
|
||||
rasterizerState.MultisampleEnable = false;
|
||||
rasterizerState.AntialiasedLineEnable = false;
|
||||
g_pd3dDevice->CreateRasterizerState(&rasterizerState, &g_pRasterState);
|
||||
g_pd3dDevice->RSSetState(g_pRasterState);
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: InitTextures()
|
||||
// Desc: Initializes Direct3D Textures (allocation and initialization)
|
||||
//-----------------------------------------------------------------------------
|
||||
HRESULT InitTextures() {
|
||||
//
|
||||
// create the D3D resources we'll be using
|
||||
//
|
||||
// 2D texture
|
||||
{
|
||||
g_texture_2d.width = 256;
|
||||
g_texture_2d.height = 256;
|
||||
|
||||
D3D10_TEXTURE2D_DESC desc;
|
||||
ZeroMemory(&desc, sizeof(D3D10_TEXTURE2D_DESC));
|
||||
desc.Width = g_texture_2d.width;
|
||||
desc.Height = g_texture_2d.height;
|
||||
desc.MipLevels = 1;
|
||||
desc.ArraySize = 1;
|
||||
desc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
|
||||
desc.SampleDesc.Count = 1;
|
||||
desc.Usage = D3D10_USAGE_DEFAULT;
|
||||
desc.BindFlags = D3D10_BIND_SHADER_RESOURCE;
|
||||
|
||||
if (FAILED(
|
||||
g_pd3dDevice->CreateTexture2D(&desc, NULL, &g_texture_2d.pTexture)))
|
||||
return E_FAIL;
|
||||
|
||||
if (FAILED(g_pd3dDevice->CreateShaderResourceView(
|
||||
g_texture_2d.pTexture, NULL, &g_texture_2d.pSRView)))
|
||||
return E_FAIL;
|
||||
|
||||
g_pTexture2D->SetResource(g_texture_2d.pSRView);
|
||||
}
|
||||
|
||||
// 3D texture
|
||||
{
|
||||
g_texture_3d.width = 64;
|
||||
g_texture_3d.height = 64;
|
||||
g_texture_3d.depth = 64;
|
||||
|
||||
D3D10_TEXTURE3D_DESC desc;
|
||||
ZeroMemory(&desc, sizeof(D3D10_TEXTURE3D_DESC));
|
||||
desc.Width = g_texture_3d.width;
|
||||
desc.Height = g_texture_3d.height;
|
||||
desc.Depth = g_texture_3d.depth;
|
||||
desc.MipLevels = 1;
|
||||
desc.Format = DXGI_FORMAT_R8G8B8A8_SNORM;
|
||||
desc.Usage = D3D10_USAGE_DEFAULT;
|
||||
desc.BindFlags = D3D10_BIND_SHADER_RESOURCE;
|
||||
|
||||
if (FAILED(
|
||||
g_pd3dDevice->CreateTexture3D(&desc, NULL, &g_texture_3d.pTexture)))
|
||||
return E_FAIL;
|
||||
|
||||
if (FAILED(g_pd3dDevice->CreateShaderResourceView(
|
||||
g_texture_3d.pTexture, NULL, &g_texture_3d.pSRView)))
|
||||
return E_FAIL;
|
||||
|
||||
g_pTexture3D->SetResource(g_texture_3d.pSRView);
|
||||
}
|
||||
|
||||
// cube texture
|
||||
{
|
||||
g_texture_cube.size = 64;
|
||||
|
||||
D3D10_TEXTURE2D_DESC desc;
|
||||
ZeroMemory(&desc, sizeof(D3D10_TEXTURE2D_DESC));
|
||||
desc.Width = g_texture_cube.size;
|
||||
desc.Height = g_texture_cube.size;
|
||||
desc.MipLevels = 1;
|
||||
desc.ArraySize = 6;
|
||||
desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
desc.SampleDesc.Count = 1;
|
||||
desc.Usage = D3D10_USAGE_DEFAULT;
|
||||
desc.BindFlags = D3D10_BIND_SHADER_RESOURCE;
|
||||
desc.MiscFlags = D3D10_RESOURCE_MISC_TEXTURECUBE;
|
||||
|
||||
if (FAILED(g_pd3dDevice->CreateTexture2D(&desc, NULL,
|
||||
&g_texture_cube.pTexture)))
|
||||
return E_FAIL;
|
||||
|
||||
D3D10_SHADER_RESOURCE_VIEW_DESC SRVDesc;
|
||||
ZeroMemory(&SRVDesc, sizeof(SRVDesc));
|
||||
SRVDesc.Format = desc.Format;
|
||||
SRVDesc.ViewDimension = D3D10_SRV_DIMENSION_TEXTURECUBE;
|
||||
SRVDesc.TextureCube.MipLevels = desc.MipLevels;
|
||||
SRVDesc.TextureCube.MostDetailedMip = 0;
|
||||
|
||||
if (FAILED(g_pd3dDevice->CreateShaderResourceView(
|
||||
g_texture_cube.pTexture, &SRVDesc, &g_texture_cube.pSRView)))
|
||||
return E_FAIL;
|
||||
|
||||
g_pTextureCube->SetResource(g_texture_cube.pSRView);
|
||||
}
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Run the Cuda part of the computation
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void RunKernels() {
|
||||
static float t = 0.0f;
|
||||
|
||||
// populate the 2d texture
|
||||
{
|
||||
cudaArray *cuArray;
|
||||
cudaGraphicsSubResourceGetMappedArray(&cuArray, g_texture_2d.cudaResource,
|
||||
0, 0);
|
||||
getLastCudaError(
|
||||
"cudaGraphicsSubResourceGetMappedArray (cuda_texture_2d) failed");
|
||||
|
||||
// kick off the kernel and send the staging buffer cudaLinearMemory as an
|
||||
// argument to allow the kernel to write to it
|
||||
cuda_texture_2d(g_texture_2d.cudaLinearMemory, g_texture_2d.width,
|
||||
g_texture_2d.height, g_texture_2d.pitch, t);
|
||||
getLastCudaError("cuda_texture_2d failed");
|
||||
|
||||
// then we want to copy cudaLinearMemory to the D3D texture, via its mapped
|
||||
// form : cudaArray
|
||||
cudaMemcpy2DToArray(
|
||||
cuArray, // dst array
|
||||
0, 0, // offset
|
||||
g_texture_2d.cudaLinearMemory, g_texture_2d.pitch, // src
|
||||
g_texture_2d.width * 4 * sizeof(float), g_texture_2d.height, // extent
|
||||
cudaMemcpyDeviceToDevice); // kind
|
||||
getLastCudaError("cudaMemcpy2DToArray failed");
|
||||
}
|
||||
// populate the volume texture
|
||||
{
|
||||
size_t pitchSlice = g_texture_3d.pitch * g_texture_3d.height;
|
||||
cudaArray *cuArray;
|
||||
cudaGraphicsSubResourceGetMappedArray(&cuArray, g_texture_3d.cudaResource,
|
||||
0, 0);
|
||||
getLastCudaError(
|
||||
"cudaGraphicsSubResourceGetMappedArray (cuda_texture_3d) failed");
|
||||
|
||||
// kick off the kernel and send the staging buffer cudaLinearMemory as an
|
||||
// argument to allow the kernel to write to it
|
||||
cuda_texture_3d(g_texture_3d.cudaLinearMemory, g_texture_3d.width,
|
||||
g_texture_3d.height, g_texture_3d.depth, g_texture_3d.pitch,
|
||||
pitchSlice, t);
|
||||
getLastCudaError("cuda_texture_3d failed");
|
||||
|
||||
// then we want to copy cudaLinearMemory to the D3D texture, via its mapped
|
||||
// form : cudaArray
|
||||
struct cudaMemcpy3DParms memcpyParams = {0};
|
||||
memcpyParams.dstArray = cuArray;
|
||||
memcpyParams.srcPtr.ptr = g_texture_3d.cudaLinearMemory;
|
||||
memcpyParams.srcPtr.pitch = g_texture_3d.pitch;
|
||||
memcpyParams.srcPtr.xsize = g_texture_3d.width;
|
||||
memcpyParams.srcPtr.ysize = g_texture_3d.height;
|
||||
memcpyParams.extent.width = g_texture_3d.width;
|
||||
memcpyParams.extent.height = g_texture_3d.height;
|
||||
memcpyParams.extent.depth = g_texture_3d.depth;
|
||||
memcpyParams.kind = cudaMemcpyDeviceToDevice;
|
||||
cudaMemcpy3D(&memcpyParams);
|
||||
getLastCudaError("cudaMemcpy3D failed");
|
||||
}
|
||||
|
||||
// populate the faces of the cube map
|
||||
for (int face = 0; face < 6; ++face) {
|
||||
cudaArray *cuArray;
|
||||
cudaGraphicsSubResourceGetMappedArray(&cuArray, g_texture_cube.cudaResource,
|
||||
face, 0);
|
||||
getLastCudaError(
|
||||
"cudaGraphicsSubResourceGetMappedArray (cuda_texture_cube) failed");
|
||||
|
||||
// kick off the kernel and send the staging buffer cudaLinearMemory as an
|
||||
// argument to allow the kernel to write to it
|
||||
cuda_texture_cube(g_texture_cube.cudaLinearMemory, g_texture_cube.size,
|
||||
g_texture_cube.size, g_texture_cube.pitch, face, t);
|
||||
getLastCudaError("cuda_texture_cube failed");
|
||||
|
||||
// then we want to copy cudaLinearMemory to the D3D texture, via its mapped
|
||||
// form : cudaArray
|
||||
cudaMemcpy2DToArray(cuArray, // dst array
|
||||
0, 0, // offset
|
||||
g_texture_cube.cudaLinearMemory,
|
||||
g_texture_cube.pitch, // src
|
||||
g_texture_cube.size * 4, g_texture_cube.size, // extent
|
||||
cudaMemcpyDeviceToDevice); // kind
|
||||
getLastCudaError("cudaMemcpy2DToArray failed");
|
||||
}
|
||||
|
||||
t += 0.01f;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Draw the final result on the screen
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void DrawScene() {
|
||||
// Clear the backbuffer to a black color
|
||||
float ClearColor[4] = {0.5f, 0.5f, 0.6f, 1.0f};
|
||||
g_pd3dDevice->ClearRenderTargetView(g_pSwapChainRTV, ClearColor);
|
||||
|
||||
//
|
||||
// draw the 2d texture
|
||||
//
|
||||
g_pUseCase->SetInt(0);
|
||||
float quadRect[4] = {-0.9f, -0.9f, 0.7f, 0.7f};
|
||||
g_pvQuadRect->SetFloatVector((float *)&quadRect);
|
||||
g_pSimpleTechnique->GetPassByIndex(0)->Apply(0);
|
||||
g_pd3dDevice->Draw(4, 0);
|
||||
|
||||
//
|
||||
// draw a slice the 3d texture
|
||||
//
|
||||
g_pUseCase->SetInt(1);
|
||||
quadRect[1] = 0.1f;
|
||||
g_pvQuadRect->SetFloatVector((float *)&quadRect);
|
||||
g_pSimpleTechnique->GetPassByIndex(0)->Apply(0);
|
||||
g_pd3dDevice->Draw(4, 0);
|
||||
|
||||
//
|
||||
// draw the 6 faces of the cube texture
|
||||
//
|
||||
float faceRect[4] = {-0.1f, -0.9f, 0.5f, 0.5f};
|
||||
|
||||
for (int f = 0; f < 6; f++) {
|
||||
if (f == 3) {
|
||||
faceRect[0] += 0.55f;
|
||||
faceRect[1] = -0.9f;
|
||||
}
|
||||
|
||||
g_pUseCase->SetInt(2 + f);
|
||||
g_pvQuadRect->SetFloatVector((float *)&faceRect);
|
||||
g_pSimpleTechnique->GetPassByIndex(0)->Apply(0);
|
||||
g_pd3dDevice->Draw(4, 0);
|
||||
faceRect[1] += 0.6f;
|
||||
}
|
||||
|
||||
// Present the backbuffer contents to the display
|
||||
g_pSwapChain->Present(0, 0);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: Cleanup()
|
||||
// Desc: Releases all previously initialized objects
|
||||
//-----------------------------------------------------------------------------
|
||||
void Cleanup() {
|
||||
// unregister the Cuda resources
|
||||
cudaGraphicsUnregisterResource(g_texture_2d.cudaResource);
|
||||
getLastCudaError("cudaGraphicsUnregisterResource (g_texture_2d) failed");
|
||||
cudaFree(g_texture_2d.cudaLinearMemory);
|
||||
getLastCudaError("cudaFree (g_texture_2d) failed");
|
||||
|
||||
cudaGraphicsUnregisterResource(g_texture_cube.cudaResource);
|
||||
getLastCudaError("cudaGraphicsUnregisterResource (g_texture_cube) failed");
|
||||
cudaFree(g_texture_cube.cudaLinearMemory);
|
||||
getLastCudaError("cudaFree (g_texture_2d) failed");
|
||||
|
||||
cudaGraphicsUnregisterResource(g_texture_3d.cudaResource);
|
||||
getLastCudaError("cudaGraphicsUnregisterResource (g_texture_3d) failed");
|
||||
cudaFree(g_texture_3d.cudaLinearMemory);
|
||||
getLastCudaError("cudaFree (g_texture_2d) failed");
|
||||
|
||||
//
|
||||
// clean up Direct3D
|
||||
//
|
||||
{
|
||||
// release the resources we created
|
||||
g_texture_2d.pSRView->Release();
|
||||
g_texture_2d.pTexture->Release();
|
||||
g_texture_cube.pSRView->Release();
|
||||
g_texture_cube.pTexture->Release();
|
||||
g_texture_3d.pSRView->Release();
|
||||
g_texture_3d.pTexture->Release();
|
||||
|
||||
if (g_pInputLayout != NULL) g_pInputLayout->Release();
|
||||
|
||||
if (g_pSimpleEffect != NULL) g_pSimpleEffect->Release();
|
||||
|
||||
if (g_pSwapChainRTV != NULL) g_pSwapChainRTV->Release();
|
||||
|
||||
if (g_pSwapChain != NULL) g_pSwapChain->Release();
|
||||
|
||||
if (g_pd3dDevice != NULL) g_pd3dDevice->Release();
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: Render()
|
||||
// Desc: Launches the CUDA kernels to fill in the texture data
|
||||
//-----------------------------------------------------------------------------
|
||||
void Render() {
|
||||
//
|
||||
// map the resources we've registered so we can access them in Cuda
|
||||
// - it is most efficient to map and unmap all resources in a single call,
|
||||
// and to have the map/unmap calls be the boundary between using the GPU
|
||||
// for Direct3D and Cuda
|
||||
//
|
||||
static bool doit = true;
|
||||
|
||||
if (doit) {
|
||||
doit = true;
|
||||
cudaStream_t stream = 0;
|
||||
const int nbResources = 3;
|
||||
cudaGraphicsResource *ppResources[nbResources] = {
|
||||
g_texture_2d.cudaResource, g_texture_3d.cudaResource,
|
||||
g_texture_cube.cudaResource,
|
||||
};
|
||||
cudaGraphicsMapResources(nbResources, ppResources, stream);
|
||||
getLastCudaError("cudaGraphicsMapResources(3) failed");
|
||||
|
||||
//
|
||||
// run kernels which will populate the contents of those textures
|
||||
//
|
||||
RunKernels();
|
||||
|
||||
//
|
||||
// unmap the resources
|
||||
//
|
||||
cudaGraphicsUnmapResources(nbResources, ppResources, stream);
|
||||
getLastCudaError("cudaGraphicsUnmapResources(3) failed");
|
||||
}
|
||||
|
||||
//
|
||||
// draw the scene using them
|
||||
//
|
||||
DrawScene();
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: MsgProc()
|
||||
// Desc: The window's message handler
|
||||
//-----------------------------------------------------------------------------
|
||||
static LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam,
|
||||
LPARAM lParam) {
|
||||
switch (msg) {
|
||||
case WM_KEYDOWN:
|
||||
if (wParam == VK_ESCAPE) {
|
||||
g_bDone = true;
|
||||
Cleanup();
|
||||
PostQuitMessage(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case WM_DESTROY:
|
||||
g_bDone = true;
|
||||
Cleanup();
|
||||
PostQuitMessage(0);
|
||||
return 0;
|
||||
|
||||
case WM_PAINT:
|
||||
ValidateRect(hWnd, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return DefWindowProc(hWnd, msg, wParam, lParam);
|
||||
}
|
@ -1,78 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define PI 3.1415926536f
|
||||
|
||||
/*
|
||||
* Paint a 2D texture with a moving red/green hatch pattern on a
|
||||
* strobing blue background. Note that this kernel reads to and
|
||||
* writes from the texture, hence why this texture was not mapped
|
||||
* as WriteDiscard.
|
||||
*/
|
||||
__global__ void cuda_kernel_texture_2d(unsigned char *surface, int width,
|
||||
int height, size_t pitch, float t) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
float *pixel;
|
||||
|
||||
// in the case where, due to quantization into grids, we have
|
||||
// more threads than pixels, skip the threads which don't
|
||||
// correspond to valid pixels
|
||||
if (x >= width || y >= height) return;
|
||||
|
||||
// get a pointer to the pixel at (x,y)
|
||||
pixel = (float *)(surface + y * pitch) + 4 * x;
|
||||
|
||||
// populate it
|
||||
float value_x = 0.5f + 0.5f * cos(t + 10.0f * ((2.0f * x) / width - 1.0f));
|
||||
float value_y = 0.5f + 0.5f * cos(t + 10.0f * ((2.0f * y) / height - 1.0f));
|
||||
pixel[0] = 0.5 * pixel[0] + 0.5 * pow(value_x, 3.0f); // red
|
||||
pixel[1] = 0.5 * pixel[1] + 0.5 * pow(value_y, 3.0f); // green
|
||||
pixel[2] = 0.5f + 0.5f * cos(t); // blue
|
||||
pixel[3] = 1; // alpha
|
||||
}
|
||||
|
||||
extern "C" void cuda_texture_2d(void *surface, int width, int height,
|
||||
size_t pitch, float t) {
|
||||
cudaError_t error = cudaSuccess;
|
||||
|
||||
dim3 Db = dim3(16, 16); // block dimensions are fixed to be 256 threads
|
||||
dim3 Dg = dim3((width + Db.x - 1) / Db.x, (height + Db.y - 1) / Db.y);
|
||||
|
||||
cuda_kernel_texture_2d<<<Dg, Db>>>((unsigned char *)surface, width, height,
|
||||
pitch, t);
|
||||
|
||||
error = cudaGetLastError();
|
||||
|
||||
if (error != cudaSuccess) {
|
||||
printf("cuda_kernel_texture_2d() failed to launch error = %d\n", error);
|
||||
}
|
||||
}
|
@ -1,76 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* Paint a 3D texture with a gradient in X (blue) and Z (green), and have every
|
||||
* other Z slice have full red.
|
||||
*/
|
||||
__global__ void cuda_kernel_texture_3d(unsigned char *surface, int width,
|
||||
int height, int depth, size_t pitch,
|
||||
size_t pitchSlice, float t) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
// in the case where, due to quantization into grids, we have
|
||||
// more threads than pixels, skip the threads which don't
|
||||
// correspond to valid pixels
|
||||
if (x >= width || y >= height) return;
|
||||
|
||||
// walk across the Z slices of this texture. it should be noted that
|
||||
// this is far from optimal data access.
|
||||
for (int z = 0; z < depth; ++z) {
|
||||
// get a pointer to this pixel
|
||||
unsigned char *pixel = surface + z * pitchSlice + y * pitch + 4 * x;
|
||||
pixel[0] = (unsigned char)(255.f * (0.5f + 0.5f *
|
||||
cos(t + (x * x + y * y + z * z) * 0.0001f * 3.14f))); // red
|
||||
pixel[1] = (unsigned char)(255.f * (0.5f + 0.5f *
|
||||
sin(t + (x * x + y * y + z * z) * 0.0001f * 3.14f))); // green
|
||||
pixel[2] = (unsigned char)0; // blue
|
||||
pixel[3] = 255; // alpha
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void cuda_texture_3d(void *surface, int width, int height, int depth,
|
||||
size_t pitch, size_t pitchSlice, float t) {
|
||||
cudaError_t error = cudaSuccess;
|
||||
|
||||
dim3 Db = dim3(16, 16); // block dimensions are fixed to be 256 threads
|
||||
dim3 Dg = dim3((width + Db.x - 1) / Db.x, (height + Db.y - 1) / Db.y);
|
||||
|
||||
cuda_kernel_texture_3d<<<Dg, Db>>>((unsigned char *)surface, width, height,
|
||||
depth, pitch, pitchSlice, t);
|
||||
|
||||
error = cudaGetLastError();
|
||||
|
||||
if (error != cudaSuccess) {
|
||||
printf("cuda_kernel_texture_3d() failed to launch error = %d\n", error);
|
||||
}
|
||||
}
|
@ -1,91 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define PI 3.1415926536f
|
||||
|
||||
/*
|
||||
* Paint a 2D surface with a moving bulls-eye pattern. The "face" parameter
|
||||
* selects
|
||||
* between 6 different colors to use. We will use a different color on each
|
||||
* face of a
|
||||
* cube map.
|
||||
*/
|
||||
__global__ void cuda_kernel_texture_cube(char *surface, int width, int height,
|
||||
size_t pitch, int face, float t) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
unsigned char *pixel;
|
||||
|
||||
// in the case where, due to quantization into grids, we have
|
||||
// more threads than pixels, skip the threads which don't
|
||||
// correspond to valid pixels
|
||||
if (x >= width || y >= height) return;
|
||||
|
||||
// get a pointer to this pixel
|
||||
pixel = (unsigned char *)(surface + y * pitch) + 4 * x;
|
||||
|
||||
// populate it
|
||||
float theta_x = (2.0f * x) / width - 1.0f;
|
||||
float theta_y = (2.0f * y) / height - 1.0f;
|
||||
float theta = 2.0f * PI * sqrt(theta_x * theta_x + theta_y * theta_y);
|
||||
unsigned char value = 255 * (0.6f + 0.4f * cos(theta + t));
|
||||
|
||||
pixel[3] = 255; // alpha
|
||||
|
||||
if (face % 2) {
|
||||
pixel[0] = // blue
|
||||
pixel[1] = // green
|
||||
pixel[2] = 0.5; // red
|
||||
pixel[face / 2] = value;
|
||||
} else {
|
||||
pixel[0] = // blue
|
||||
pixel[1] = // green
|
||||
pixel[2] = value; // red
|
||||
pixel[face / 2] = 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void cuda_texture_cube(void *surface, int width, int height,
|
||||
size_t pitch, int face, float t) {
|
||||
cudaError_t error = cudaSuccess;
|
||||
|
||||
dim3 Db = dim3(16, 16); // block dimensions are fixed to be 256 threads
|
||||
dim3 Dg = dim3((width + Db.x - 1) / Db.x, (height + Db.y - 1) / Db.y);
|
||||
|
||||
cuda_kernel_texture_cube<<<Dg, Db>>>((char *)surface, width, height, pitch,
|
||||
face, t);
|
||||
|
||||
error = cudaGetLastError();
|
||||
|
||||
if (error != cudaSuccess) {
|
||||
printf("cuda_kernel_texture_cube() failed to launch error = %d\n", error);
|
||||
}
|
||||
}
|
@ -1,46 +0,0 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Target rules
|
||||
all: build
|
||||
|
||||
build:
|
||||
$(info >>> WARNING - simpleD3D9 is not supported on Linux - waiving sample <<<)
|
||||
|
||||
run: build
|
||||
|
||||
testrun: build
|
||||
|
||||
clean:
|
||||
|
||||
clobber: clean
|
@ -1,49 +0,0 @@
|
||||
# simpleD3D9 - Simple Direct3D9 (Vertex Arrays)
|
||||
|
||||
## Description
|
||||
|
||||
Simple program which demonstrates interoperability between CUDA and Direct3D9. The program generates a vertex array with CUDA and uses Direct3D9 to render the geometry. A Direct3D capable device is required.
|
||||
|
||||
## Key Concepts
|
||||
|
||||
Graphics Interop
|
||||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
Windows
|
||||
|
||||
## Supported CPU Architecture
|
||||
|
||||
x86_64
|
||||
|
||||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaGraphicsUnmapResources, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaGraphicsUnregisterResource
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[DirectX](../../../README.md#directx)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
||||
### Windows
|
||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
||||
```
|
||||
*_vs<version>.sln - for Visual Studio <version>
|
||||
```
|
||||
Each individual sample has its own set of solution files in its directory:
|
||||
|
||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
||||
|
||||
## References (for more details)
|
||||
|
Binary file not shown.
Binary file not shown.
Before Width: | Height: | Size: 269 KiB |
Binary file not shown.
Before Width: | Height: | Size: 65 KiB |
Binary file not shown.
Before Width: | Height: | Size: 21 KiB |
@ -1,668 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
// This example demonstrates how to use the CUDA Direct3D bindings to fill
|
||||
// a vertex buffer with CUDA and use Direct3D to render the data.
|
||||
// Host code.
|
||||
|
||||
#pragma warning(disable : 4312)
|
||||
|
||||
#include <Windows.h>
|
||||
#include <mmsystem.h>
|
||||
#pragma warning(disable : 4996) // disable deprecated warning
|
||||
#include <strsafe.h>
|
||||
#pragma warning(default : 4996)
|
||||
#include <cassert>
|
||||
|
||||
// includes, cuda
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cuda_d3d9_interop.h>
|
||||
|
||||
// includes, project
|
||||
#include <rendercheck_d3d9.h>
|
||||
#include <helper_functions.h> // Helper functions for other non-cuda utilities
|
||||
#include <helper_cuda.h> // CUDA Helper Functions for initialization
|
||||
#include <DirectXMath.h>
|
||||
using namespace DirectX;
|
||||
|
||||
#define MAX_EPSILON 10
|
||||
|
||||
static char *sSDKsample = "simpleD3D9";
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Global variables
|
||||
//-----------------------------------------------------------------------------
|
||||
IDirect3D9Ex *g_pD3D = NULL; // Used to create the D3DDevice
|
||||
unsigned int g_iAdapter = NULL; // Our adapter
|
||||
IDirect3DDevice9Ex *g_pD3DDevice = NULL; // Our rendering device
|
||||
IDirect3DVertexBuffer9 *g_pVB = NULL; // Buffer to hold vertices
|
||||
|
||||
struct cudaGraphicsResource *cuda_VB_resource; // handles D3D9-CUDA exchange
|
||||
|
||||
D3DDISPLAYMODEEX g_d3ddm;
|
||||
D3DPRESENT_PARAMETERS g_d3dpp;
|
||||
|
||||
bool g_bWindowed = true;
|
||||
bool g_bDeviceLost = false;
|
||||
bool g_bPassed = true;
|
||||
|
||||
// A structure for our custom vertex type
|
||||
struct CUSTOMVERTEX {
|
||||
FLOAT x, y, z; // The untransformed, 3D position for the vertex
|
||||
DWORD color; // The vertex color
|
||||
};
|
||||
|
||||
// Our custom FVF, which describes our custom vertex structure
|
||||
#define D3DFVF_CUSTOMVERTEX (D3DFVF_XYZ | D3DFVF_DIFFUSE)
|
||||
|
||||
const unsigned int g_WindowWidth = 512;
|
||||
const unsigned int g_WindowHeight = 512;
|
||||
|
||||
const unsigned int g_MeshWidth = 256;
|
||||
const unsigned int g_MeshHeight = 256;
|
||||
|
||||
const unsigned int g_NumVertices = g_MeshWidth * g_MeshHeight;
|
||||
|
||||
bool g_bQAReadback = false;
|
||||
int g_iFrameToCompare = 10;
|
||||
|
||||
int *pArgc = NULL;
|
||||
char **pArgv = NULL;
|
||||
|
||||
float anim;
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Forward declarations
|
||||
//-----------------------------------------------------------------------------
|
||||
void runTest(int argc, char **argv, char *ref_file);
|
||||
void runCuda();
|
||||
bool SaveVBResult(int argc, char **argv);
|
||||
HRESULT InitD3D9(HWND hWnd);
|
||||
HRESULT InitD3D9RenderState();
|
||||
HRESULT InitCUDA();
|
||||
HRESULT RestoreContextResources();
|
||||
HRESULT InitVertexBuffer();
|
||||
HRESULT FreeVertexBuffer();
|
||||
VOID Cleanup();
|
||||
VOID SetupMatrices();
|
||||
HRESULT Render();
|
||||
LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
|
||||
|
||||
// CUDA D3D9 kernel
|
||||
extern "C" void simpleD3DKernel(float4 *pos, unsigned int width,
|
||||
unsigned int height, float time);
|
||||
|
||||
#define NAME_LEN 512
|
||||
|
||||
char device_name[NAME_LEN];
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Program main
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int main(int argc, char **argv) {
|
||||
char *ref_file = NULL;
|
||||
|
||||
pArgc = &argc;
|
||||
pArgv = argv;
|
||||
|
||||
printf("> %s starting...\n", sSDKsample);
|
||||
|
||||
// command line options
|
||||
if (argc > 1) {
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "file")) {
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "file",
|
||||
(char **)&ref_file);
|
||||
}
|
||||
}
|
||||
|
||||
runTest(argc, argv, ref_file);
|
||||
|
||||
//
|
||||
// and exit
|
||||
//
|
||||
printf("%s running on %s exiting...\n", sSDKsample, device_name);
|
||||
printf("%s sample finished returned: %s\n", sSDKsample,
|
||||
(g_bPassed ? "OK" : "ERROR!"));
|
||||
exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Run a simple test for CUDA
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void runTest(int argc, char **argv, char *ref_file) {
|
||||
// Register the window class
|
||||
WNDCLASSEX wc = {sizeof(WNDCLASSEX), CS_CLASSDC, MsgProc, 0L, 0L,
|
||||
GetModuleHandle(NULL), NULL, NULL, NULL, NULL,
|
||||
"CUDA/D3D9 simpleD3D9", NULL};
|
||||
RegisterClassEx(&wc);
|
||||
|
||||
// Create the application's window
|
||||
int xBorder = ::GetSystemMetrics(SM_CXSIZEFRAME);
|
||||
int yBorder = ::GetSystemMetrics(SM_CYSIZEFRAME);
|
||||
int yMenu = ::GetSystemMetrics(SM_CYMENU);
|
||||
HWND hWnd = CreateWindow(
|
||||
wc.lpszClassName, "CUDA/D3D9 simpleD3D9", WS_OVERLAPPEDWINDOW, 0, 0,
|
||||
g_WindowWidth + 2 * xBorder, g_WindowHeight + 2 * yBorder + yMenu, NULL,
|
||||
NULL, wc.hInstance, NULL);
|
||||
|
||||
// Initialize Direct3D9
|
||||
if (SUCCEEDED(InitD3D9(hWnd)) && SUCCEEDED(InitCUDA())) {
|
||||
// Create the scene geometry
|
||||
if (SUCCEEDED(InitVertexBuffer())) {
|
||||
// This is the normal case (D3D9 device is present)
|
||||
if (!g_bDeviceLost) {
|
||||
// Initialize D3D9 vertex buffer contents using CUDA kernel
|
||||
runCuda();
|
||||
|
||||
// Save result
|
||||
SaveVBResult(argc, argv);
|
||||
|
||||
// Show the window
|
||||
ShowWindow(hWnd, SW_SHOWDEFAULT);
|
||||
UpdateWindow(hWnd);
|
||||
}
|
||||
|
||||
// Enter the message loop
|
||||
MSG msg;
|
||||
ZeroMemory(&msg, sizeof(msg));
|
||||
|
||||
while (msg.message != WM_QUIT) {
|
||||
if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE)) {
|
||||
TranslateMessage(&msg);
|
||||
DispatchMessage(&msg);
|
||||
} else {
|
||||
Render();
|
||||
|
||||
if (ref_file != NULL) {
|
||||
for (int count = 0; count < g_iFrameToCompare; count++) {
|
||||
Render();
|
||||
}
|
||||
|
||||
const char *cur_image_path = "simpleD3D9.ppm";
|
||||
|
||||
// Save a reference of our current test run image
|
||||
CheckRenderD3D9::BackbufferToPPM(g_pD3DDevice, cur_image_path);
|
||||
|
||||
// compare to offical reference image, printing PASS or FAIL.
|
||||
g_bPassed = CheckRenderD3D9::PPMvsPPM(cur_image_path, ref_file,
|
||||
argv[0], MAX_EPSILON, 0.15f);
|
||||
|
||||
Cleanup();
|
||||
|
||||
PostQuitMessage(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UnregisterClass(wc.lpszClassName, wc.hInstance);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Run the Cuda part of the computation
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void runCuda() {
|
||||
HRESULT hr = S_OK;
|
||||
|
||||
// Map vertex buffer to Cuda
|
||||
float4 *d_ptr;
|
||||
|
||||
// CUDA Map call to the Vertex Buffer and return a pointer
|
||||
checkCudaErrors(cudaGraphicsMapResources(1, &cuda_VB_resource, 0));
|
||||
getLastCudaError("cudaGraphicsMapResources failed");
|
||||
// This gets a pointer from the Vertex Buffer
|
||||
size_t num_bytes;
|
||||
checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
|
||||
(void **)&d_ptr, &num_bytes, cuda_VB_resource));
|
||||
getLastCudaError("cudaGraphicsResourceGetMappedPointer failed");
|
||||
|
||||
// Execute kernel
|
||||
simpleD3DKernel(d_ptr, g_MeshWidth, g_MeshHeight, anim);
|
||||
|
||||
// CUDA Map Unmap vertex buffer
|
||||
checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_VB_resource, 0));
|
||||
getLastCudaError("cudaGraphicsUnmapResource failed");
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Check if the result is correct or write data to file for external
|
||||
//! regression testing
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool SaveVBResult(int argc, char **argv) {
|
||||
// Lock vertex buffer
|
||||
float *data;
|
||||
|
||||
if (FAILED(g_pVB->Lock(0, 0, (void **)&data, 0))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Save result
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "regression")) {
|
||||
// write file for regression test
|
||||
sdkWriteFile<float>("./data/regression.dat", data, sizeof(CUSTOMVERTEX),
|
||||
0.0f, false);
|
||||
}
|
||||
|
||||
// unlock
|
||||
if (FAILED(g_pVB->Unlock())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: InitD3D9()
|
||||
// Desc: Initializes Direct3D9
|
||||
//-----------------------------------------------------------------------------
|
||||
HRESULT InitD3D9(HWND hWnd) {
|
||||
// Create the D3D object.
|
||||
if (S_OK != Direct3DCreate9Ex(D3D_SDK_VERSION, &g_pD3D)) {
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
D3DADAPTER_IDENTIFIER9 adapterId;
|
||||
int device;
|
||||
bool bDeviceFound = false;
|
||||
printf("\n");
|
||||
|
||||
cudaError cuStatus;
|
||||
|
||||
for (g_iAdapter = 0; g_iAdapter < g_pD3D->GetAdapterCount(); g_iAdapter++) {
|
||||
HRESULT hr = g_pD3D->GetAdapterIdentifier(g_iAdapter, 0, &adapterId);
|
||||
|
||||
if (FAILED(hr)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
cuStatus = cudaD3D9GetDevice(&device, adapterId.DeviceName);
|
||||
// This prints and resets the cudaError to cudaSuccess
|
||||
printLastCudaError("cudaD3D9GetDevice failed");
|
||||
|
||||
printf("> Display Device #%d: \"%s\" %s Direct3D9\n", g_iAdapter,
|
||||
adapterId.Description,
|
||||
(cuStatus == cudaSuccess) ? "supports" : "does not support");
|
||||
|
||||
if (cudaSuccess == cuStatus) {
|
||||
bDeviceFound = true;
|
||||
STRCPY(device_name, NAME_LEN, adapterId.Description);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// we check to make sure we have found a cuda-compatible D3D device to work on
|
||||
if (!bDeviceFound) {
|
||||
printf("\n");
|
||||
printf(" No CUDA-compatible Direct3D9 device available\n");
|
||||
printf("PASSED\n");
|
||||
// destroy the D3D device
|
||||
g_pD3D->Release();
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
RECT rc;
|
||||
GetClientRect(hWnd, &rc);
|
||||
g_pD3D->GetAdapterDisplayModeEx(g_iAdapter, &g_d3ddm, NULL);
|
||||
|
||||
// Set up the structure used to create the D3DDevice
|
||||
ZeroMemory(&g_d3dpp, sizeof(g_d3dpp));
|
||||
g_d3dpp.Windowed = g_bWindowed;
|
||||
g_d3dpp.BackBufferCount = 1;
|
||||
g_d3dpp.hDeviceWindow = hWnd;
|
||||
g_d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
|
||||
g_d3dpp.BackBufferFormat = g_d3ddm.Format;
|
||||
g_d3dpp.FullScreen_RefreshRateInHz = 0; // set to 60 for fullscreen, and also
|
||||
// don't forget to set Windowed to
|
||||
// FALSE
|
||||
g_d3dpp.PresentationInterval =
|
||||
D3DPRESENT_INTERVAL_ONE; // D3DPRESENT_DONOTWAIT;
|
||||
|
||||
g_d3dpp.BackBufferWidth = g_WindowWidth;
|
||||
g_d3dpp.BackBufferHeight = g_WindowHeight;
|
||||
|
||||
// Create the D3DDevice
|
||||
if (FAILED(g_pD3D->CreateDeviceEx(g_iAdapter, D3DDEVTYPE_HAL, hWnd,
|
||||
D3DCREATE_HARDWARE_VERTEXPROCESSING,
|
||||
&g_d3dpp, NULL, &g_pD3DDevice))) {
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
if (FAILED(InitD3D9RenderState())) {
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
// Initialize the D3D Rendering State
|
||||
HRESULT InitD3D9RenderState() {
|
||||
// Turn off culling, so we see the front and back of the triangle
|
||||
if (FAILED(g_pD3DDevice->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE))) {
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
// Turn off D3D lighting, since we are providing our own vertex colors
|
||||
if (FAILED(g_pD3DDevice->SetRenderState(D3DRS_LIGHTING, FALSE))) {
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
HRESULT InitCUDA() {
|
||||
printf("InitCUDA() g_pD3DDevice = %p\n", g_pD3DDevice);
|
||||
|
||||
// Now we need to bind a CUDA context to the DX9 device
|
||||
// This is the CUDA 2.0 DX9 interface (required for Windows XP and Vista)
|
||||
cudaD3D9SetDirect3DDevice(g_pD3DDevice);
|
||||
getLastCudaError("cudaD3D9SetDirect3DDevice failed");
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! RestoreContextResourcess
|
||||
// - this function restores all of the CUDA/D3D resources and contexts
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
HRESULT RestoreContextResources() {
|
||||
// Reinitialize D3D9 resources, CUDA resources/contexts
|
||||
InitCUDA();
|
||||
InitVertexBuffer();
|
||||
InitD3D9RenderState();
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: InitVertexBuffer()
|
||||
// Desc: Creates the scene geometry (Vertex Buffer)
|
||||
//-----------------------------------------------------------------------------
|
||||
HRESULT InitVertexBuffer() {
|
||||
// Create vertex buffer
|
||||
if (FAILED(g_pD3DDevice->CreateVertexBuffer(
|
||||
g_NumVertices * sizeof(CUSTOMVERTEX), 0, D3DFVF_CUSTOMVERTEX,
|
||||
D3DPOOL_DEFAULT, &g_pVB, NULL))) {
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
// Initialize interoperability between CUDA and Direct3D9
|
||||
// Register vertex buffer with CUDA
|
||||
cudaGraphicsD3D9RegisterResource(&cuda_VB_resource, g_pVB,
|
||||
cudaD3D9RegisterFlagsNone);
|
||||
getLastCudaError("cudaGraphicsD3D9RegisterResource failed");
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: FreeVertexBuffer()
|
||||
// Desc: Free's the Vertex Buffer resource
|
||||
//-----------------------------------------------------------------------------
|
||||
HRESULT FreeVertexBuffer() {
|
||||
if (g_pVB != NULL) {
|
||||
// Unregister vertex buffer
|
||||
cudaGraphicsUnregisterResource(cuda_VB_resource);
|
||||
getLastCudaError("cudaGraphicsUnregisterResource failed");
|
||||
|
||||
g_pVB->Release();
|
||||
}
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: Cleanup()
|
||||
// Desc: Releases all previously initialized objects
|
||||
//-----------------------------------------------------------------------------
|
||||
VOID Cleanup() {
|
||||
FreeVertexBuffer();
|
||||
|
||||
if (g_pD3DDevice != NULL) {
|
||||
g_pD3DDevice->Release();
|
||||
}
|
||||
|
||||
if (g_pD3D != NULL) {
|
||||
g_pD3D->Release();
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: SetupMatrices()
|
||||
// Desc: Sets up the world, view, and projection transform matrices.
|
||||
//-----------------------------------------------------------------------------
|
||||
VOID SetupMatrices() {
|
||||
// For our world matrix, we will just rotate the object about the y-axis.
|
||||
XMFLOAT4X4 matWorldFloat;
|
||||
XMMATRIX matWorld;
|
||||
matWorld = XMMatrixIdentity();
|
||||
XMStoreFloat4x4(&matWorldFloat, matWorld);
|
||||
g_pD3DDevice->SetTransform(D3DTS_WORLD, (D3DMATRIX *)&matWorldFloat);
|
||||
|
||||
// Set up our view matrix. A view matrix can be defined given an eye point,
|
||||
// a point to lookat, and a direction for which way is up. Here, we set the
|
||||
// eye five units back along the z-axis and up three units, look at the
|
||||
// origin, and define "up" to be in the y-direction.
|
||||
XMVECTOR vEyePt = {0.0f, 3.0f, -2.0f};
|
||||
XMVECTOR vLookatPt = {0.0f, 0.0f, 0.0f};
|
||||
XMVECTOR vUpVec = {0.0f, 1.0f, 0.0f};
|
||||
XMMATRIX matView;
|
||||
XMFLOAT4X4 matViewFloat;
|
||||
matView = XMMatrixLookAtLH(vEyePt, vLookatPt, vUpVec);
|
||||
XMStoreFloat4x4(&matViewFloat, matView);
|
||||
g_pD3DDevice->SetTransform(D3DTS_VIEW, (D3DMATRIX *)&matViewFloat);
|
||||
|
||||
// For the projection matrix, we set up a perspective transform (which
|
||||
// transforms geometry from 3D view space to 2D viewport space, with
|
||||
// a perspective divide making objects smaller in the distance). To build
|
||||
// a perpsective transform, we need the field of view (1/4 pi is common),
|
||||
// the aspect ratio, and the near and far clipping planes (which define at
|
||||
// what distances geometry should be no longer be rendered).
|
||||
XMMATRIX matProj;
|
||||
XMFLOAT4X4 matProjFloat;
|
||||
matProj = XMMatrixPerspectiveFovLH((float)XM_PI / 4, 1.0f, 1.0f, 100.0f);
|
||||
XMStoreFloat4x4(&matProjFloat, matProj);
|
||||
g_pD3DDevice->SetTransform(D3DTS_PROJECTION, (D3DMATRIX *)&matProjFloat);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! DeviceLostHandler
|
||||
// - this function handles reseting and initialization of the D3D device
|
||||
// in the event this Device gets Lost
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
HRESULT DeviceLostHandler() {
|
||||
HRESULT hr = S_OK;
|
||||
|
||||
// test the cooperative level to see if it's okay
|
||||
// to render
|
||||
if (FAILED(hr = g_pD3DDevice->TestCooperativeLevel())) {
|
||||
// if the device was truly lost, (i.e., a fullscreen device just lost
|
||||
// focus), wait
|
||||
// until we g_et it back
|
||||
if (hr == D3DERR_DEVICELOST) {
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
// eventually, we will g_et this return value,
|
||||
// indicating that we can now reset the device
|
||||
if (hr == D3DERR_DEVICENOTRESET) {
|
||||
// if we are windowed, read the desktop mode and use the same format for
|
||||
// the back buffer; this effectively turns off color conversion
|
||||
|
||||
if (g_bWindowed) {
|
||||
g_pD3D->GetAdapterDisplayModeEx(g_iAdapter, &g_d3ddm, NULL);
|
||||
g_d3dpp.BackBufferFormat = g_d3ddm.Format;
|
||||
}
|
||||
|
||||
// now try to reset the device
|
||||
if (FAILED(hr = g_pD3DDevice->Reset(&g_d3dpp))) {
|
||||
return hr;
|
||||
} else {
|
||||
// This is a common function we use to restore all hardware
|
||||
// resources/state
|
||||
RestoreContextResources();
|
||||
|
||||
// we have acquired the device
|
||||
g_bDeviceLost = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: Render()
|
||||
// Desc: Draws the scene
|
||||
//-----------------------------------------------------------------------------
|
||||
HRESULT Render() {
|
||||
HRESULT hr = S_OK;
|
||||
|
||||
// Begin code to handle case where the D3D gets lost
|
||||
if (g_bDeviceLost) {
|
||||
if (FAILED(hr = DeviceLostHandler())) {
|
||||
fprintf(stderr, "DeviceLostHandler FAILED returned %08x\n", hr);
|
||||
return hr;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Render DeviceLost handler\n");
|
||||
|
||||
// test the cooperative level to see if it's okay
|
||||
// to render
|
||||
if (FAILED(hr = g_pD3DDevice->TestCooperativeLevel())) {
|
||||
fprintf(stderr,
|
||||
"TestCooperativeLevel = %08x failed, will attempt to reset\n",
|
||||
hr);
|
||||
|
||||
// if the device was truly lost, (i.e., a fullscreen device just lost
|
||||
// focus), wait
|
||||
// until we g_et it back
|
||||
|
||||
if (hr == D3DERR_DEVICELOST) {
|
||||
fprintf(
|
||||
stderr,
|
||||
"TestCooperativeLevel = %08x DeviceLost, will retry next call\n",
|
||||
hr);
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
// eventually, we will g_et this return value,
|
||||
// indicating that we can now reset the device
|
||||
if (hr == D3DERR_DEVICENOTRESET) {
|
||||
fprintf(stderr,
|
||||
"TestCooperativeLevel = %08x will try to RESET the device\n",
|
||||
hr);
|
||||
// if we are windowed, read the desktop mode and use the same format for
|
||||
// the back buffer; this effectively turns off color conversion
|
||||
|
||||
if (g_bWindowed) {
|
||||
g_pD3D->GetAdapterDisplayModeEx(g_iAdapter, &g_d3ddm, NULL);
|
||||
g_d3dpp.BackBufferFormat = g_d3ddm.Format;
|
||||
}
|
||||
|
||||
// now try to reset the device
|
||||
if (FAILED(hr = g_pD3DDevice->Reset(&g_d3dpp))) {
|
||||
fprintf(stderr, "TestCooperativeLevel = %08x RESET device FAILED\n",
|
||||
hr);
|
||||
return hr;
|
||||
} else {
|
||||
fprintf(stderr, "TestCooperativeLevel = %08x RESET device SUCCESS!\n",
|
||||
hr);
|
||||
|
||||
// Reinitialize D3D9 resources, CUDA resources/contexts
|
||||
InitCUDA();
|
||||
InitVertexBuffer();
|
||||
InitD3D9RenderState();
|
||||
|
||||
fprintf(stderr, "TestCooperativeLevel = %08x INIT device SUCCESS!\n",
|
||||
hr);
|
||||
|
||||
// we have acquired the device
|
||||
g_bDeviceLost = false;
|
||||
}
|
||||
}
|
||||
|
||||
return hr;
|
||||
}
|
||||
}
|
||||
|
||||
if (!g_bDeviceLost) {
|
||||
// Clear the backbuffer to a black color
|
||||
g_pD3DDevice->Clear(0, NULL, D3DCLEAR_TARGET, D3DCOLOR_XRGB(0, 0, 0), 1.0f,
|
||||
0);
|
||||
|
||||
// Run CUDA to update vertex positions
|
||||
runCuda();
|
||||
|
||||
// Begin the scene
|
||||
if (SUCCEEDED(g_pD3DDevice->BeginScene())) {
|
||||
// Setup the world, view, and projection matrices
|
||||
SetupMatrices();
|
||||
|
||||
// Render the vertex buffer contents
|
||||
g_pD3DDevice->SetStreamSource(0, g_pVB, 0, sizeof(CUSTOMVERTEX));
|
||||
g_pD3DDevice->SetFVF(D3DFVF_CUSTOMVERTEX);
|
||||
g_pD3DDevice->DrawPrimitive(D3DPT_POINTLIST, 0, g_NumVertices);
|
||||
|
||||
// End the scene
|
||||
g_pD3DDevice->EndScene();
|
||||
}
|
||||
|
||||
// Present the backbuffer contents to the display
|
||||
hr = g_pD3DDevice->Present(NULL, NULL, NULL, NULL);
|
||||
|
||||
if (hr == D3DERR_DEVICELOST) {
|
||||
fprintf(stderr, "drawScene Present = %08x detected D3D DeviceLost\n", hr);
|
||||
g_bDeviceLost = true;
|
||||
|
||||
FreeVertexBuffer();
|
||||
}
|
||||
}
|
||||
|
||||
anim += 0.1f;
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: MsgProc()
|
||||
// Desc: The window's message handler
|
||||
//-----------------------------------------------------------------------------
|
||||
LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) {
|
||||
switch (msg) {
|
||||
case WM_DESTROY:
|
||||
case WM_KEYDOWN:
|
||||
if (msg != WM_KEYDOWN || wParam == 27) {
|
||||
Cleanup();
|
||||
|
||||
PostQuitMessage(0);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return DefWindowProc(hWnd, msg, wParam, lParam);
|
||||
}
|
Binary file not shown.
@ -1,79 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
// This example demonstrates how to use the CUDA Direct3D bindings with the
|
||||
// runtime API.
|
||||
// Device code.
|
||||
|
||||
#ifndef _SIMPLED3D_KERNEL_CU_
|
||||
#define _SIMPLED3D_KERNEL_CU_
|
||||
|
||||
// includes, C string library
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//! Simple kernel to modify vertex positions in sine wave pattern
|
||||
//! @param pos pos in global memory
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
__global__ void kernel(float4 *pos, unsigned int width, unsigned int height,
|
||||
float time) {
|
||||
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
// calculate uv coordinates
|
||||
float u = x / (float)width;
|
||||
float v = y / (float)height;
|
||||
u = u * 2.0f - 1.0f;
|
||||
v = v * 2.0f - 1.0f;
|
||||
|
||||
// calculate simple sine wave pattern
|
||||
float freq = 4.0f;
|
||||
float w = sinf(u * freq + time) * cosf(v * freq + time) * 0.5f;
|
||||
|
||||
// write output vertex
|
||||
pos[y * width + x] = make_float4(u, w, v, __int_as_float(0xff00ff00));
|
||||
}
|
||||
|
||||
extern "C" void simpleD3DKernel(float4 *pos, unsigned int width,
|
||||
unsigned int height, float time) {
|
||||
cudaError_t error = cudaSuccess;
|
||||
|
||||
dim3 block(8, 8, 1);
|
||||
dim3 grid(width / block.x, height / block.y, 1);
|
||||
|
||||
kernel<<<grid, block>>>(pos, width, height, time);
|
||||
|
||||
error = cudaGetLastError();
|
||||
|
||||
if (error != cudaSuccess) {
|
||||
printf("kernel() failed to launch error = %d\n", error);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // #ifndef _SIMPLED3D_KERNEL_CU_
|
@ -1,46 +0,0 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Target rules
|
||||
all: build
|
||||
|
||||
build:
|
||||
$(info >>> WARNING - simpleD3D9Texture is not supported on Linux - waiving sample <<<)
|
||||
|
||||
run: build
|
||||
|
||||
testrun: build
|
||||
|
||||
clean:
|
||||
|
||||
clobber: clean
|
@ -1,49 +0,0 @@
|
||||
# simpleD3D9Texture - Simple D3D9 Texture
|
||||
|
||||
## Description
|
||||
|
||||
Simple program which demonstrates Direct3D9 Texture interoperability with CUDA. The program creates a number of D3D9 Textures (2D, 3D, and CubeMap) which are written to from CUDA kernels. Direct3D then renders the results on the screen. A Direct3D capable device is required.
|
||||
|
||||
## Key Concepts
|
||||
|
||||
Graphics Interop, Texture
|
||||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
Windows
|
||||
|
||||
## Supported CPU Architecture
|
||||
|
||||
x86_64
|
||||
|
||||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaFree, cudaGetLastError, cudaGraphicsMapResources, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[DirectX](../../../README.md#directx)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
||||
### Windows
|
||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
||||
```
|
||||
*_vs<version>.sln - for Visual Studio <version>
|
||||
```
|
||||
Each individual sample has its own set of solution files in its directory:
|
||||
|
||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
||||
|
||||
## References (for more details)
|
||||
|
Binary file not shown.
Binary file not shown.
Before Width: | Height: | Size: 172 KiB |
Binary file not shown.
Before Width: | Height: | Size: 67 KiB |
Binary file not shown.
Before Width: | Height: | Size: 26 KiB |
@ -1,884 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* This example demonstrates how to use the CUDA Direct3D bindings to
|
||||
* transfer data between CUDA and DX9 2D, CubeMap, and Volume Textures.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
#define WINDOWS_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
// This header inclues all the necessary D3D10 and CUDA includes
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cuda_d3d9_interop.h>
|
||||
|
||||
// includes, project
|
||||
#include <rendercheck_d3d9.h>
|
||||
#include <helper_cuda.h>
|
||||
#include <helper_functions.h> // includes cuda.h and cuda_runtime_api.h
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#define MAX_EPSILON 10
|
||||
|
||||
static char *SDK_name = "simpleD3D9Texture";
|
||||
|
||||
bool g_bDone = false;
|
||||
bool g_bPassed = true;
|
||||
IDirect3D9Ex *g_pD3D; // Used to create the D3DDevice
|
||||
unsigned int g_iAdapter;
|
||||
IDirect3DDevice9Ex *g_pD3DDevice;
|
||||
|
||||
D3DDISPLAYMODEEX g_d3ddm;
|
||||
D3DPRESENT_PARAMETERS g_d3dpp;
|
||||
|
||||
bool g_bWindowed = true;
|
||||
bool g_bDeviceLost = false;
|
||||
|
||||
const unsigned int g_WindowWidth = 720;
|
||||
const unsigned int g_WindowHeight = 720;
|
||||
|
||||
int g_iFrameToCompare = 10;
|
||||
|
||||
int *pArgc = NULL;
|
||||
char **pArgv = NULL;
|
||||
|
||||
// Data structure for 2D texture shared between DX9 and CUDA
|
||||
struct {
|
||||
IDirect3DTexture9 *pTexture;
|
||||
cudaGraphicsResource *cudaResource;
|
||||
void *cudaLinearMemory;
|
||||
size_t pitch;
|
||||
int width;
|
||||
int height;
|
||||
} g_texture_2d;
|
||||
|
||||
// Data structure for cube texture shared between DX9 and CUDA
|
||||
struct {
|
||||
IDirect3DCubeTexture9 *pTexture;
|
||||
cudaGraphicsResource *cudaResource;
|
||||
void *cudaLinearMemory;
|
||||
size_t pitch;
|
||||
int size;
|
||||
} g_texture_cube;
|
||||
|
||||
// Data structure for volume textures shared between DX9 and CUDA
|
||||
struct {
|
||||
IDirect3DVolumeTexture9 *pTexture;
|
||||
cudaGraphicsResource *cudaResource;
|
||||
void *cudaLinearMemory;
|
||||
size_t pitch;
|
||||
int width;
|
||||
int height;
|
||||
int depth;
|
||||
} g_texture_vol;
|
||||
|
||||
// The CUDA kernel launchers that get called
|
||||
extern "C" {
|
||||
bool cuda_texture_2d(void *surface, size_t width, size_t height, size_t pitch,
|
||||
float t);
|
||||
bool cuda_texture_cube(void *surface, int width, int height, size_t pitch,
|
||||
int face, float t);
|
||||
bool cuda_texture_volume(void *surface, int width, int height, int depth,
|
||||
size_t pitch, size_t pitchslice, float t);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Forward declarations
|
||||
//-----------------------------------------------------------------------------
|
||||
HRESULT InitD3D9(HWND hWnd);
|
||||
HRESULT InitCUDA();
|
||||
HRESULT InitTextures();
|
||||
HRESULT ReleaseTextures();
|
||||
HRESULT RegisterD3D9ResourceWithCUDA();
|
||||
HRESULT DeviceLostHandler();
|
||||
|
||||
void RunKernels();
|
||||
HRESULT DrawScene();
|
||||
void Cleanup();
|
||||
void RunCUDA();
|
||||
|
||||
LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
|
||||
|
||||
#define NAME_LEN 512
|
||||
|
||||
char device_name[NAME_LEN];
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Program main
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int main(int argc, char *argv[]) {
|
||||
char *ref_file = NULL;
|
||||
|
||||
pArgc = &argc;
|
||||
pArgv = argv;
|
||||
|
||||
printf("[%s] - Starting...\n", SDK_name);
|
||||
|
||||
// command line options
|
||||
if (argc > 1) {
|
||||
// automatied build testing harness
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "file"))
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
|
||||
}
|
||||
|
||||
//
|
||||
// create window
|
||||
//
|
||||
// Register the window class
|
||||
#if 1
|
||||
WNDCLASSEX wc = {sizeof(WNDCLASSEX), CS_CLASSDC, MsgProc, 0L, 0L,
|
||||
GetModuleHandle(NULL), NULL, NULL, NULL, NULL,
|
||||
"CUDA/D3D9 Texture InterOP", NULL};
|
||||
RegisterClassEx(&wc);
|
||||
|
||||
int xBorder = ::GetSystemMetrics(SM_CXSIZEFRAME);
|
||||
int yMenu = ::GetSystemMetrics(SM_CYMENU);
|
||||
int yBorder = ::GetSystemMetrics(SM_CYSIZEFRAME);
|
||||
|
||||
// Create the application's window (padding by window border for uniform BB
|
||||
// sizes across OSs)
|
||||
HWND hWnd = CreateWindow(
|
||||
wc.lpszClassName, "CUDA/D3D9 Texture InterOP", WS_OVERLAPPEDWINDOW, 0, 0,
|
||||
g_WindowWidth + 2 * xBorder, g_WindowHeight + 2 * yBorder + yMenu, NULL,
|
||||
NULL, wc.hInstance, NULL);
|
||||
|
||||
#else
|
||||
static WNDCLASSEX wc = {
|
||||
sizeof(WNDCLASSEX), CS_CLASSDC, MsgProc, 0L, 0L,
|
||||
GetModuleHandle(NULL), NULL, NULL, NULL, NULL,
|
||||
"CudaD3D9Tex", NULL};
|
||||
RegisterClassEx(&wc);
|
||||
HWND hWnd = CreateWindow("CudaD3D9Tex", "CUDA D3D9 Texture Interop",
|
||||
WS_OVERLAPPEDWINDOW, 0, 0, 800, 320,
|
||||
GetDesktopWindow(), NULL, wc.hInstance, NULL);
|
||||
#endif
|
||||
|
||||
ShowWindow(hWnd, SW_SHOWDEFAULT);
|
||||
UpdateWindow(hWnd);
|
||||
|
||||
// Initialize Direct3D
|
||||
if (SUCCEEDED(InitD3D9(hWnd)) && SUCCEEDED(InitCUDA()) &&
|
||||
SUCCEEDED(InitTextures())) {
|
||||
if (!g_bDeviceLost) {
|
||||
RegisterD3D9ResourceWithCUDA();
|
||||
}
|
||||
} else {
|
||||
printf("\n");
|
||||
printf(" No CUDA-compatible Direct3D9 device available\n");
|
||||
printf("WAIVED\n");
|
||||
exit(EXIT_WAIVED);
|
||||
}
|
||||
|
||||
//
|
||||
// the main loop
|
||||
//
|
||||
while (false == g_bDone) {
|
||||
RunCUDA();
|
||||
DrawScene();
|
||||
|
||||
//
|
||||
// handle I/O
|
||||
//
|
||||
MSG msg;
|
||||
ZeroMemory(&msg, sizeof(msg));
|
||||
|
||||
while (msg.message != WM_QUIT) {
|
||||
if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE)) {
|
||||
TranslateMessage(&msg);
|
||||
DispatchMessage(&msg);
|
||||
} else {
|
||||
RunCUDA();
|
||||
DrawScene();
|
||||
|
||||
if (ref_file) {
|
||||
for (int count = 0; count < g_iFrameToCompare; count++) {
|
||||
RunCUDA();
|
||||
DrawScene();
|
||||
}
|
||||
|
||||
const char *cur_image_path = "simpleD3D9Texture.ppm";
|
||||
|
||||
// Save a reference of our current test run image
|
||||
CheckRenderD3D9::BackbufferToPPM(g_pD3DDevice, cur_image_path);
|
||||
|
||||
// compare to offical reference image, printing PASS or FAIL.
|
||||
g_bPassed = CheckRenderD3D9::PPMvsPPM(cur_image_path, ref_file,
|
||||
argv[0], MAX_EPSILON, 0.15f);
|
||||
|
||||
g_bDone = true;
|
||||
|
||||
Cleanup();
|
||||
PostQuitMessage(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Unregister windows class
|
||||
UnregisterClass(wc.lpszClassName, wc.hInstance);
|
||||
|
||||
//
|
||||
// and exit
|
||||
//
|
||||
printf("> %s running on %s exiting...\n", SDK_name, device_name);
|
||||
|
||||
exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: InitD3D9()
|
||||
// Desc: Initializes Direct3D9
|
||||
//-----------------------------------------------------------------------------
|
||||
HRESULT InitD3D9(HWND hWnd) {
|
||||
// Create the D3D object.
|
||||
if (S_OK != Direct3DCreate9Ex(D3D_SDK_VERSION, &g_pD3D)) {
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
D3DADAPTER_IDENTIFIER9 adapterId;
|
||||
int device;
|
||||
bool bDeviceFound = false;
|
||||
printf("\n");
|
||||
|
||||
cudaError cuStatus;
|
||||
|
||||
for (g_iAdapter = 0; g_iAdapter < g_pD3D->GetAdapterCount(); g_iAdapter++) {
|
||||
HRESULT hr = g_pD3D->GetAdapterIdentifier(g_iAdapter, 0, &adapterId);
|
||||
|
||||
if (FAILED(hr)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
cuStatus = cudaD3D9GetDevice(&device, adapterId.DeviceName);
|
||||
// This prints and resets the cudaError to cudaSuccess
|
||||
printLastCudaError("cudaD3D9GetDevice failed");
|
||||
|
||||
printf("> Display Device #%d: \"%s\" %s Direct3D9\n", g_iAdapter,
|
||||
adapterId.Description,
|
||||
(cuStatus == cudaSuccess) ? "supports" : "does not support");
|
||||
|
||||
if (cudaSuccess == cuStatus) {
|
||||
bDeviceFound = true;
|
||||
STRCPY(device_name, NAME_LEN, adapterId.Description);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// we check to make sure we have found a cuda-compatible D3D device to work on
|
||||
if (!bDeviceFound) {
|
||||
printf("\n");
|
||||
printf(" No CUDA-compatible Direct3D9 device available\n");
|
||||
printf("PASSED\n");
|
||||
// destroy the D3D device
|
||||
g_pD3D->Release();
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
// Create the D3D Display Device
|
||||
RECT rc;
|
||||
GetClientRect(hWnd, &rc);
|
||||
D3DDISPLAYMODE d3ddm;
|
||||
g_pD3D->GetAdapterDisplayMode(g_iAdapter, &d3ddm);
|
||||
D3DPRESENT_PARAMETERS d3dpp;
|
||||
ZeroMemory(&d3dpp, sizeof(d3dpp));
|
||||
d3dpp.Windowed = TRUE;
|
||||
d3dpp.BackBufferCount = 1;
|
||||
d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
|
||||
d3dpp.hDeviceWindow = hWnd;
|
||||
// d3dpp.BackBufferWidth = g_bQAReadback?g_WindowWidth:(rc.right - rc.left);
|
||||
// d3dpp.BackBufferHeight = g_bQAReadback?g_WindowHeight:(rc.bottom - rc.top);
|
||||
d3dpp.BackBufferWidth = g_WindowWidth;
|
||||
d3dpp.BackBufferHeight = g_WindowHeight;
|
||||
|
||||
d3dpp.BackBufferFormat = d3ddm.Format;
|
||||
|
||||
if (FAILED(g_pD3D->CreateDeviceEx(g_iAdapter, D3DDEVTYPE_HAL, hWnd,
|
||||
D3DCREATE_HARDWARE_VERTEXPROCESSING, &d3dpp,
|
||||
NULL, &g_pD3DDevice))) {
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
// We clear the back buffer
|
||||
g_pD3DDevice->BeginScene();
|
||||
g_pD3DDevice->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0);
|
||||
g_pD3DDevice->EndScene();
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
HRESULT InitCUDA() {
|
||||
printf("InitCUDA() g_pD3DDevice = %p\n", g_pD3DDevice);
|
||||
|
||||
// Now we need to bind a CUDA context to the DX9 device
|
||||
// This is the CUDA 2.0 DX9 interface (required for Windows XP and Vista)
|
||||
cudaD3D9SetDirect3DDevice(g_pD3DDevice);
|
||||
getLastCudaError("cudaD3D9SetDirect3DDevice failed");
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
HRESULT RegisterD3D9ResourceWithCUDA() {
|
||||
// 2D
|
||||
// register the Direct3D resources that we'll use
|
||||
// we'll read to and write from g_texture_2d, so don't set any special map
|
||||
// flags for it
|
||||
cudaGraphicsD3D9RegisterResource(&g_texture_2d.cudaResource,
|
||||
g_texture_2d.pTexture,
|
||||
cudaGraphicsRegisterFlagsNone);
|
||||
getLastCudaError("cudaGraphicsD3D9RegisterResource (g_texture_2d) failed");
|
||||
// cuda cannot write into the texture directly : the texture is seen as a
|
||||
// cudaArray and can only be mapped as a texture
|
||||
// Create a buffer so that cuda can write into it
|
||||
// pixel fmt is DXGI_FORMAT_R32G32B32A32_FLOAT
|
||||
cudaMallocPitch(&g_texture_2d.cudaLinearMemory, &g_texture_2d.pitch,
|
||||
g_texture_2d.width * sizeof(float) * 4, g_texture_2d.height);
|
||||
getLastCudaError("cudaMallocPitch (g_texture_2d) failed");
|
||||
cudaMemset(g_texture_2d.cudaLinearMemory, 1,
|
||||
g_texture_2d.pitch * g_texture_2d.height);
|
||||
|
||||
// CUBE
|
||||
cudaGraphicsD3D9RegisterResource(&g_texture_cube.cudaResource,
|
||||
g_texture_cube.pTexture,
|
||||
cudaGraphicsRegisterFlagsNone);
|
||||
getLastCudaError("cudaGraphicsD3D9RegisterResource (g_texture_cube) failed");
|
||||
// create the buffer. pixel fmt is DXGI_FORMAT_R8G8B8A8_SNORM
|
||||
cudaMallocPitch(&g_texture_cube.cudaLinearMemory, &g_texture_cube.pitch,
|
||||
g_texture_cube.size * 4, g_texture_cube.size);
|
||||
getLastCudaError("cudaMallocPitch (g_texture_cube) failed");
|
||||
cudaMemset(g_texture_cube.cudaLinearMemory, 1,
|
||||
g_texture_cube.pitch * g_texture_cube.size);
|
||||
getLastCudaError("cudaMemset (g_texture_cube) failed");
|
||||
|
||||
// 3D
|
||||
cudaGraphicsD3D9RegisterResource(&g_texture_vol.cudaResource,
|
||||
g_texture_vol.pTexture,
|
||||
cudaGraphicsRegisterFlagsNone);
|
||||
getLastCudaError("cudaGraphicsD3D9RegisterResource (g_texture_vol) failed");
|
||||
// create the buffer. pixel fmt is DXGI_FORMAT_R8G8B8A8_SNORM
|
||||
// cudaMallocPitch(&g_texture_vol.cudaLinearMemory, &g_texture_vol.pitch,
|
||||
// g_texture_vol.width * 4, g_texture_vol.height * g_texture_vol.depth);
|
||||
cudaMalloc(
|
||||
&g_texture_vol.cudaLinearMemory,
|
||||
g_texture_vol.width * 4 * g_texture_vol.height * g_texture_vol.depth);
|
||||
g_texture_vol.pitch = g_texture_vol.width * 4;
|
||||
getLastCudaError("cudaMallocPitch (g_texture_vol) failed");
|
||||
cudaMemset(g_texture_vol.cudaLinearMemory, 1,
|
||||
g_texture_vol.pitch * g_texture_vol.height * g_texture_vol.depth);
|
||||
getLastCudaError("cudaMemset (g_texture_vol) failed");
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: InitTextures()
|
||||
// Desc: Initializes Direct3D Textures (allocation and initialization)
|
||||
//-----------------------------------------------------------------------------
|
||||
HRESULT InitTextures() {
|
||||
//
|
||||
// create the D3D resources we'll be using
|
||||
//
|
||||
|
||||
// 2D texture
|
||||
g_texture_2d.width = 256;
|
||||
g_texture_2d.height = 256;
|
||||
|
||||
if (FAILED(g_pD3DDevice->CreateTexture(
|
||||
g_texture_2d.width, g_texture_2d.height, 1, 0, D3DFMT_A32B32G32R32F,
|
||||
D3DPOOL_DEFAULT, &g_texture_2d.pTexture, NULL))) {
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
// cube texture
|
||||
g_texture_cube.size = 64;
|
||||
|
||||
if (FAILED(g_pD3DDevice->CreateCubeTexture(g_texture_cube.size, 1, 0,
|
||||
D3DFMT_A8R8G8B8, D3DPOOL_DEFAULT,
|
||||
&g_texture_cube.pTexture, NULL))) {
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
// 3D texture
|
||||
g_texture_vol.width = 64;
|
||||
g_texture_vol.height = 64;
|
||||
g_texture_vol.depth = 32;
|
||||
|
||||
if (FAILED(g_pD3DDevice->CreateVolumeTexture(
|
||||
g_texture_vol.width, g_texture_vol.height, g_texture_vol.depth, 1, 0,
|
||||
D3DFMT_A8R8G8B8, D3DPOOL_DEFAULT, &g_texture_vol.pTexture, NULL))) {
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: ReleaseTextures()
|
||||
// Desc: Release Direct3D Textures (free-ing)
|
||||
//-----------------------------------------------------------------------------
|
||||
HRESULT ReleaseTextures() {
|
||||
// unregister the Cuda resources
|
||||
cudaGraphicsUnregisterResource(g_texture_2d.cudaResource);
|
||||
getLastCudaError("cudaGraphicsUnregisterResource (g_texture_2d) failed");
|
||||
cudaFree(g_texture_2d.cudaLinearMemory);
|
||||
getLastCudaError("cudaFree (g_texture_2d) failed");
|
||||
|
||||
cudaGraphicsUnregisterResource(g_texture_cube.cudaResource);
|
||||
getLastCudaError("cudaGraphicsUnregisterResource (g_texture_cube) failed");
|
||||
cudaFree(g_texture_cube.cudaLinearMemory);
|
||||
getLastCudaError("cudaFree (g_texture_2d) failed");
|
||||
|
||||
cudaGraphicsUnregisterResource(g_texture_vol.cudaResource);
|
||||
getLastCudaError("cudaGraphicsUnregisterResource (g_texture_vol) failed");
|
||||
cudaFree(g_texture_vol.cudaLinearMemory);
|
||||
getLastCudaError("cudaFree (g_texture_vol) failed");
|
||||
|
||||
//
|
||||
// clean up Direct3D
|
||||
//
|
||||
{
|
||||
// release the resources we created
|
||||
g_texture_2d.pTexture->Release();
|
||||
g_texture_cube.pTexture->Release();
|
||||
g_texture_vol.pTexture->Release();
|
||||
}
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Run the Cuda part of the computation
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void RunKernels() {
|
||||
static float t = 0.0f;
|
||||
|
||||
// populate the 2d texture
|
||||
{
|
||||
cudaArray *cuArray;
|
||||
cudaGraphicsSubResourceGetMappedArray(&cuArray, g_texture_2d.cudaResource,
|
||||
0, 0);
|
||||
getLastCudaError(
|
||||
"cudaGraphicsSubResourceGetMappedArray (cuda_texture_2d) failed");
|
||||
|
||||
// kick off the kernel and send the staging buffer cudaLinearMemory as an
|
||||
// argument to allow the kernel to write to it
|
||||
cuda_texture_2d(g_texture_2d.cudaLinearMemory, g_texture_2d.width,
|
||||
g_texture_2d.height, g_texture_2d.pitch, t);
|
||||
getLastCudaError("cuda_texture_2d failed");
|
||||
|
||||
// then we want to copy cudaLinearMemory to the D3D texture, via its mapped
|
||||
// form : cudaArray
|
||||
cudaMemcpy2DToArray(
|
||||
cuArray, // dst array
|
||||
0, 0, // offset
|
||||
g_texture_2d.cudaLinearMemory, g_texture_2d.pitch, // src
|
||||
g_texture_2d.width * 4 * sizeof(float), g_texture_2d.height, // extent
|
||||
cudaMemcpyDeviceToDevice); // kind
|
||||
getLastCudaError("cudaMemcpy2DToArray failed");
|
||||
}
|
||||
// populate the volume texture
|
||||
{
|
||||
size_t pitchSlice = g_texture_vol.pitch * g_texture_vol.height;
|
||||
cudaArray *cuArray;
|
||||
cudaGraphicsSubResourceGetMappedArray(&cuArray, g_texture_vol.cudaResource,
|
||||
0, 0);
|
||||
getLastCudaError(
|
||||
"cudaGraphicsSubResourceGetMappedArray (cuda_texture_3d) failed");
|
||||
|
||||
// kick off the kernel and send the staging buffer cudaLinearMemory as an
|
||||
// argument to allow the kernel to write to it
|
||||
cuda_texture_volume(g_texture_vol.cudaLinearMemory, g_texture_vol.width,
|
||||
g_texture_vol.height, g_texture_vol.depth,
|
||||
g_texture_vol.pitch, pitchSlice, t);
|
||||
getLastCudaError("cuda_texture_3d failed");
|
||||
|
||||
// then we want to copy cudaLinearMemory to the D3D texture, via its mapped
|
||||
// form : cudaArray
|
||||
struct cudaMemcpy3DParms memcpyParams = {0};
|
||||
memcpyParams.dstArray = cuArray;
|
||||
memcpyParams.srcPtr.ptr = g_texture_vol.cudaLinearMemory;
|
||||
memcpyParams.srcPtr.pitch = g_texture_vol.pitch;
|
||||
memcpyParams.srcPtr.xsize = g_texture_vol.width;
|
||||
memcpyParams.srcPtr.ysize = g_texture_vol.height;
|
||||
memcpyParams.extent.width = g_texture_vol.width;
|
||||
memcpyParams.extent.height = g_texture_vol.height;
|
||||
memcpyParams.extent.depth = g_texture_vol.depth;
|
||||
memcpyParams.kind = cudaMemcpyDeviceToDevice;
|
||||
cudaMemcpy3D(&memcpyParams);
|
||||
getLastCudaError("cudaMemcpy3D failed");
|
||||
}
|
||||
|
||||
// populate the faces of the cube map
|
||||
for (int face = 0; face < 6; ++face) {
|
||||
cudaArray *cuArray;
|
||||
cudaGraphicsSubResourceGetMappedArray(&cuArray, g_texture_cube.cudaResource,
|
||||
face, 0);
|
||||
getLastCudaError(
|
||||
"cudaGraphicsSubResourceGetMappedArray (cuda_texture_cube) failed");
|
||||
|
||||
// kick off the kernel and send the staging buffer cudaLinearMemory as an
|
||||
// argument to allow the kernel to write to it
|
||||
cuda_texture_cube(g_texture_cube.cudaLinearMemory, g_texture_cube.size,
|
||||
g_texture_cube.size, g_texture_cube.pitch, face, t);
|
||||
getLastCudaError("cuda_texture_cube failed");
|
||||
|
||||
// then we want to copy cudaLinearMemory to the D3D texture, via its mapped
|
||||
// form : cudaArray
|
||||
cudaMemcpy2DToArray(cuArray, // dst array
|
||||
0, 0, // offset
|
||||
g_texture_cube.cudaLinearMemory,
|
||||
g_texture_cube.pitch, // src
|
||||
g_texture_cube.size * 4, g_texture_cube.size, // extent
|
||||
cudaMemcpyDeviceToDevice); // kind
|
||||
getLastCudaError("cudaMemcpy2DToArray failed");
|
||||
}
|
||||
|
||||
t += 0.1f;
|
||||
}
|
||||
/*{
|
||||
static float t = 0.0f;
|
||||
|
||||
// populate the 2d texture
|
||||
{
|
||||
void* pData;
|
||||
size_t pitch;
|
||||
checkCudaErrorsNoSync ( cudaD3D9ResourceGetMappedPointer(&pData,
|
||||
g_texture_2d.pTexture, 0, 0) );
|
||||
checkCudaErrorsNoSync ( cudaD3D9ResourceGetMappedPitch(&pitch, NULL,
|
||||
g_texture_2d.pTexture, 0, 0) );
|
||||
cuda_texture_2d(pData, g_texture_2d.width, g_texture_2d.height, pitch,
|
||||
t);
|
||||
}
|
||||
|
||||
// populate the faces of the cube map
|
||||
for (int face = 0; face < 6; ++face)
|
||||
{
|
||||
void* pData;
|
||||
size_t pitch;
|
||||
checkCudaErrorsNoSync ( cudaD3D9ResourceGetMappedPointer(&pData,
|
||||
g_texture_cube.pTexture, face, 0) );
|
||||
checkCudaErrorsNoSync ( cudaD3D9ResourceGetMappedPitch(&pitch, NULL,
|
||||
g_texture_cube.pTexture, face, 0) );
|
||||
cuda_texture_cube(pData, g_texture_cube.size, g_texture_cube.size,
|
||||
pitch, face, t);
|
||||
}
|
||||
|
||||
// populate the volume texture
|
||||
{
|
||||
void* pData;
|
||||
size_t pitch;
|
||||
size_t pitchSlice;
|
||||
checkCudaErrorsNoSync ( cudaD3D9ResourceGetMappedPointer(&pData,
|
||||
g_texture_vol.pTexture, 0, 0) );
|
||||
checkCudaErrorsNoSync ( cudaD3D9ResourceGetMappedPitch(&pitch,
|
||||
&pitchSlice, g_texture_vol.pTexture, 0, 0) );
|
||||
cuda_texture_volume(pData, g_texture_vol.width, g_texture_vol.height,
|
||||
g_texture_vol.depth, pitch, pitchSlice);
|
||||
}
|
||||
|
||||
t += 0.1f;
|
||||
}*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! RestoreContextResources
|
||||
// - this function restores all of the CUDA/D3D resources and contexts
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
HRESULT RestoreContextResources() {
|
||||
// Reinitialize D3D9 resources, CUDA resources/contexts
|
||||
InitCUDA();
|
||||
InitTextures();
|
||||
RegisterD3D9ResourceWithCUDA();
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! DeviceLostHandler
|
||||
// - this function handles reseting and initialization of the D3D device
|
||||
// in the event this Device gets Lost
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
HRESULT DeviceLostHandler() {
|
||||
HRESULT hr = S_OK;
|
||||
|
||||
fprintf(stderr, "-> Starting DeviceLostHandler() \n");
|
||||
|
||||
// test the cooperative level to see if it's okay
|
||||
// to render
|
||||
if (FAILED(hr = g_pD3DDevice->TestCooperativeLevel())) {
|
||||
fprintf(stderr,
|
||||
"TestCooperativeLevel = %08x failed, will attempt to reset\n", hr);
|
||||
|
||||
// if the device was truly lost, (i.e., a fullscreen device just lost
|
||||
// focus), wait
|
||||
// until we g_et it back
|
||||
|
||||
if (hr == D3DERR_DEVICELOST) {
|
||||
fprintf(stderr,
|
||||
"TestCooperativeLevel = %08x DeviceLost, will retry next call\n",
|
||||
hr);
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
// eventually, we will g_et this return value,
|
||||
// indicating that we can now reset the device
|
||||
if (hr == D3DERR_DEVICENOTRESET) {
|
||||
fprintf(stderr,
|
||||
"TestCooperativeLevel = %08x will try to RESET the device\n", hr);
|
||||
// if we are windowed, read the desktop mode and use the same format for
|
||||
// the back buffer; this effectively turns off color conversion
|
||||
|
||||
if (g_bWindowed) {
|
||||
g_pD3D->GetAdapterDisplayModeEx(g_iAdapter, &g_d3ddm, NULL);
|
||||
g_d3dpp.BackBufferFormat = g_d3ddm.Format;
|
||||
}
|
||||
|
||||
// now try to reset the device
|
||||
if (FAILED(hr = g_pD3DDevice->Reset(&g_d3dpp))) {
|
||||
fprintf(stderr, "TestCooperativeLevel = %08x RESET device FAILED\n",
|
||||
hr);
|
||||
return hr;
|
||||
} else {
|
||||
fprintf(stderr, "TestCooperativeLevel = %08x RESET device SUCCESS!\n",
|
||||
hr);
|
||||
|
||||
// This is a common function we use to restore all hardware
|
||||
// resources/state
|
||||
RestoreContextResources();
|
||||
|
||||
fprintf(stderr, "TestCooperativeLevel = %08x INIT device SUCCESS!\n",
|
||||
hr);
|
||||
|
||||
// we have acquired the device
|
||||
g_bDeviceLost = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Draw the final result on the screen
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
HRESULT DrawScene() {
|
||||
HRESULT hr = S_OK;
|
||||
|
||||
if (g_bDeviceLost) {
|
||||
if (FAILED(hr = DeviceLostHandler())) {
|
||||
fprintf(stderr, "DeviceLostHandler FAILED returned %08x\n", hr);
|
||||
return hr;
|
||||
}
|
||||
}
|
||||
|
||||
if (!g_bDeviceLost) {
|
||||
//
|
||||
// we will use this index and vertex data throughout
|
||||
//
|
||||
unsigned int IB[6] = {
|
||||
0, 1, 2, 0, 2, 3,
|
||||
};
|
||||
struct VertexStruct {
|
||||
float position[3];
|
||||
float texture[3];
|
||||
};
|
||||
|
||||
//
|
||||
// initialize the scene
|
||||
//
|
||||
D3DVIEWPORT9 viewport_window = {0, 0, 672, 192, 0, 1};
|
||||
g_pD3DDevice->SetViewport(&viewport_window);
|
||||
g_pD3DDevice->BeginScene();
|
||||
g_pD3DDevice->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0);
|
||||
g_pD3DDevice->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE);
|
||||
g_pD3DDevice->SetRenderState(D3DRS_LIGHTING, FALSE);
|
||||
g_pD3DDevice->SetFVF(D3DFVF_XYZ | D3DFVF_TEX1 | D3DFVF_TEXCOORDSIZE3(0));
|
||||
|
||||
//
|
||||
// draw the 2d texture
|
||||
//
|
||||
VertexStruct VB[4] = {
|
||||
{ {-1,-1,0,}, {0,0,0,}, },
|
||||
{ { 1,-1,0,}, {1,0,0,}, },
|
||||
{ { 1, 1,0,}, {1,1,0,}, },
|
||||
{ {-1, 1,0,}, {0,1,0,}, },
|
||||
};
|
||||
D3DVIEWPORT9 viewport = {32, 32, 256, 256, 0, 1};
|
||||
g_pD3DDevice->SetViewport(&viewport);
|
||||
g_pD3DDevice->SetTexture(0, g_texture_2d.pTexture);
|
||||
g_pD3DDevice->DrawIndexedPrimitiveUP(D3DPT_TRIANGLELIST, 0, 4, 2, IB,
|
||||
D3DFMT_INDEX32, VB,
|
||||
sizeof(VertexStruct));
|
||||
|
||||
//
|
||||
// draw the Z-positive side of the cube texture
|
||||
//
|
||||
VertexStruct VB_Zpos[4] = {
|
||||
{ {-1,-1,0,}, {-1,-1, 0.5f,}, },
|
||||
{ { 1,-1,0,}, { 1,-1, 0.5f,}, },
|
||||
{ { 1, 1,0,}, { 1, 1, 0.5f,}, },
|
||||
{ {-1, 1,0,}, {-1, 1, 0.5f,}, },
|
||||
};
|
||||
viewport.Y += viewport.Height + 32;
|
||||
g_pD3DDevice->SetViewport(&viewport);
|
||||
g_pD3DDevice->SetTexture(0, g_texture_cube.pTexture);
|
||||
g_pD3DDevice->DrawIndexedPrimitiveUP(D3DPT_TRIANGLELIST, 0, 4, 2, IB,
|
||||
D3DFMT_INDEX32, VB_Zpos,
|
||||
sizeof(VertexStruct));
|
||||
|
||||
//
|
||||
// draw the Z-negative side of the cube texture
|
||||
//
|
||||
VertexStruct VB_Zneg[4] = {
|
||||
{ {-1,-1,0,}, { 1,-1,-0.5f,}, },
|
||||
{ { 1,-1,0,}, {-1,-1,-0.5f,}, },
|
||||
{ { 1, 1,0,}, {-1, 1,-0.5f,}, },
|
||||
{ {-1, 1,0,}, { 1, 1,-0.5f,}, },
|
||||
};
|
||||
viewport.X += viewport.Width + 32;
|
||||
g_pD3DDevice->SetViewport(&viewport);
|
||||
g_pD3DDevice->SetTexture(0, g_texture_cube.pTexture);
|
||||
g_pD3DDevice->DrawIndexedPrimitiveUP(D3DPT_TRIANGLELIST, 0, 4, 2, IB,
|
||||
D3DFMT_INDEX32, VB_Zneg,
|
||||
sizeof(VertexStruct));
|
||||
|
||||
//
|
||||
// draw a slice the volume texture
|
||||
//
|
||||
VertexStruct VB_Zslice[4] = {
|
||||
{ {-1,-1,0,}, {0,0,0,}, },
|
||||
{ { 1,-1,0,}, {1,0,0,}, },
|
||||
{ { 1, 1,0,}, {1,1,1,}, },
|
||||
{ {-1, 1,0,}, {0,1,1,}, },
|
||||
};
|
||||
viewport.Y -= viewport.Height + 32;
|
||||
g_pD3DDevice->SetViewport(&viewport);
|
||||
g_pD3DDevice->SetTexture(0, g_texture_vol.pTexture);
|
||||
g_pD3DDevice->DrawIndexedPrimitiveUP(D3DPT_TRIANGLELIST, 0, 4, 2, IB,
|
||||
D3DFMT_INDEX32, VB_Zslice,
|
||||
sizeof(VertexStruct));
|
||||
|
||||
//
|
||||
// end the scene
|
||||
//
|
||||
g_pD3DDevice->EndScene();
|
||||
hr = g_pD3DDevice->Present(NULL, NULL, NULL, NULL);
|
||||
|
||||
if (hr == D3DERR_DEVICELOST) {
|
||||
fprintf(stderr, "DrawScene Present = %08x detected D3D DeviceLost\n", hr);
|
||||
g_bDeviceLost = true;
|
||||
|
||||
ReleaseTextures();
|
||||
}
|
||||
}
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: Cleanup()
|
||||
// Desc: Releases all previously initialized objects
|
||||
//-----------------------------------------------------------------------------
|
||||
void Cleanup() {
|
||||
ReleaseTextures();
|
||||
|
||||
{
|
||||
// destroy the D3D device
|
||||
g_pD3DDevice->Release();
|
||||
g_pD3D->Release();
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: RunCUDA()
|
||||
// Desc: Launches the CUDA kernels to fill in the texture data
|
||||
//-----------------------------------------------------------------------------
|
||||
void RunCUDA() {
|
||||
//
|
||||
// map the resources we've registered so we can access them in Cuda
|
||||
// - it is most efficient to map and unmap all resources in a single call,
|
||||
// and to have the map/unmap calls be the boundary between using the GPU
|
||||
// for Direct3D and Cuda
|
||||
//
|
||||
|
||||
if (!g_bDeviceLost) {
|
||||
cudaStream_t stream = 0;
|
||||
const int nbResources = 3;
|
||||
cudaGraphicsResource *ppResources[nbResources] = {
|
||||
g_texture_2d.cudaResource, g_texture_vol.cudaResource,
|
||||
g_texture_cube.cudaResource,
|
||||
};
|
||||
cudaGraphicsMapResources(nbResources, ppResources, stream);
|
||||
getLastCudaError("cudaGraphicsMapResources(3) failed");
|
||||
|
||||
//
|
||||
// run kernels which will populate the contents of those textures
|
||||
//
|
||||
RunKernels();
|
||||
|
||||
//
|
||||
// unmap the resources
|
||||
//
|
||||
cudaGraphicsUnmapResources(nbResources, ppResources, stream);
|
||||
getLastCudaError("cudaGraphicsUnmapResources(3) failed");
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Name: MsgProc()
|
||||
// Desc: The window's message handler
|
||||
//-----------------------------------------------------------------------------
|
||||
static LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam,
|
||||
LPARAM lParam) {
|
||||
switch (msg) {
|
||||
case WM_KEYDOWN:
|
||||
if (wParam == VK_ESCAPE) {
|
||||
g_bDone = true;
|
||||
Cleanup();
|
||||
PostQuitMessage(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case WM_DESTROY:
|
||||
g_bDone = true;
|
||||
Cleanup();
|
||||
PostQuitMessage(0);
|
||||
return 0;
|
||||
|
||||
case WM_PAINT:
|
||||
ValidateRect(hWnd, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return DefWindowProc(hWnd, msg, wParam, lParam);
|
||||
}
|
@ -1,78 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define PI 3.1415926536f
|
||||
|
||||
/*
|
||||
* Paint a 2D texture with a moving red/green hatch pattern on a
|
||||
* strobing blue background. Note that this kernel reads to and
|
||||
* writes from the texture, hence why this texture was not mapped
|
||||
* as WriteDiscard.
|
||||
*/
|
||||
__global__ void cuda_kernel_texture_2d(unsigned char *surface, int width,
|
||||
int height, size_t pitch, float t) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
float *pixel;
|
||||
|
||||
// in the case where, due to quantization into grids, we have
|
||||
// more threads than pixels, skip the threads which don't
|
||||
// correspond to valid pixels
|
||||
if (x >= width || y >= height) return;
|
||||
|
||||
// get a pointer to the pixel at (x,y)
|
||||
pixel = (float *)(surface + y * pitch) + 4 * x;
|
||||
|
||||
// populate it
|
||||
float value_x = 0.5f + 0.5f * cos(t + 10.0f * ((2.0f * x) / width - 1.0f));
|
||||
float value_y = 0.5f + 0.5f * cos(t + 10.0f * ((2.0f * y) / height - 1.0f));
|
||||
pixel[0] = 0.5 * pixel[0] + 0.5 * pow(value_x, 3.0f); // red
|
||||
pixel[1] = 0.5 * pixel[1] + 0.5 * pow(value_y, 3.0f); // green
|
||||
pixel[2] = 0.5f + 0.5f * cos(t); // blue
|
||||
pixel[3] = 1; // alpha
|
||||
}
|
||||
|
||||
extern "C" void cuda_texture_2d(void *surface, int width, int height,
|
||||
size_t pitch, float t) {
|
||||
cudaError_t error = cudaSuccess;
|
||||
|
||||
dim3 Db = dim3(16, 16); // block dimensions are fixed to be 256 threads
|
||||
dim3 Dg = dim3((width + Db.x - 1) / Db.x, (height + Db.y - 1) / Db.y);
|
||||
|
||||
cuda_kernel_texture_2d<<<Dg, Db>>>((unsigned char *)surface, width, height,
|
||||
pitch, t);
|
||||
|
||||
error = cudaGetLastError();
|
||||
|
||||
if (error != cudaSuccess) {
|
||||
printf("cuda_kernel_texture_2d() failed to launch error = %d\n", error);
|
||||
}
|
||||
}
|
@ -1,89 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define PI 3.1415926536f
|
||||
|
||||
/*
|
||||
* Paint a 2D surface with a moving bulls-eye pattern. The "face" parameter
|
||||
* selects between 6 different colors to use. We will use a different color on
|
||||
* each face of a cube map.
|
||||
*/
|
||||
__global__ void cuda_kernel_texture_cube(char *surface, int width, int height,
|
||||
size_t pitch, int face, float t) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
unsigned char *pixel;
|
||||
|
||||
// in the case where, due to quantization into grids, we have
|
||||
// more threads than pixels, skip the threads which don't
|
||||
// correspond to valid pixels
|
||||
if (x >= width || y >= height) return;
|
||||
|
||||
// get a pointer to this pixel
|
||||
pixel = (unsigned char *)(surface + y * pitch) + 4 * x;
|
||||
|
||||
// populate it
|
||||
float theta_x = (2.0f * x) / width - 1.0f;
|
||||
float theta_y = (2.0f * y) / height - 1.0f;
|
||||
float theta = 2.0f * PI * sqrt(theta_x * theta_x + theta_y * theta_y);
|
||||
unsigned char value = 255 * (0.6f + 0.4f * cos(theta + t));
|
||||
|
||||
pixel[3] = 255; // alpha
|
||||
|
||||
if (face % 2) {
|
||||
pixel[0] = // blue
|
||||
pixel[1] = // green
|
||||
pixel[2] = 0; // red
|
||||
pixel[face / 2] = value;
|
||||
} else {
|
||||
pixel[0] = // blue
|
||||
pixel[1] = // green
|
||||
pixel[2] = value; // red
|
||||
pixel[face / 2] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void cuda_texture_cube(void *surface, int width, int height,
|
||||
size_t pitch, int face, float t) {
|
||||
cudaError_t error = cudaSuccess;
|
||||
|
||||
dim3 Db = dim3(16, 16); // block dimensions are fixed to be 256 threads
|
||||
dim3 Dg = dim3((width + Db.x - 1) / Db.x, (height + Db.y - 1) / Db.y);
|
||||
|
||||
cuda_kernel_texture_cube<<<Dg, Db>>>((char *)surface, width, height, pitch,
|
||||
face, t);
|
||||
|
||||
error = cudaGetLastError();
|
||||
|
||||
if (error != cudaSuccess) {
|
||||
printf("cuda_kernel_texture_cube() failed to launch error = %d\n", error);
|
||||
}
|
||||
}
|
@ -1,75 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* Paint a 3D texture with a gradient in X (blue) and Z (green), and have every
|
||||
* other Z slice have full red.
|
||||
*/
|
||||
__global__ void cuda_kernel_texture_volume(unsigned char *surface, int width,
|
||||
int height, int depth, size_t pitch,
|
||||
size_t pitchSlice) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
// in the case where, due to quantization into grids, we have
|
||||
// more threads than pixels, skip the threads which don't
|
||||
// correspond to valid pixels
|
||||
if (x >= width || y >= height) return;
|
||||
|
||||
// walk across the Z slices of this texture. it should be noted that
|
||||
// this is far from optimal data access.
|
||||
for (int z = 0; z < depth; ++z) {
|
||||
// get a pointer to this pixel
|
||||
unsigned char *pixel = surface + z * pitchSlice + y * pitch + 4 * x;
|
||||
pixel[0] = 255 * x / (width - 1); // blue
|
||||
pixel[1] = 255 * z / (depth - 1); // green
|
||||
pixel[2] = 255 * (z % 2); // red
|
||||
pixel[3] = 255; // alpha
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void cuda_texture_volume(void *surface, int width, int height,
|
||||
int depth, size_t pitch, size_t pitchSlice,
|
||||
float t) {
|
||||
cudaError_t error = cudaSuccess;
|
||||
|
||||
dim3 Db = dim3(16, 16); // block dimensions are fixed to be 256 threads
|
||||
dim3 Dg = dim3((width + Db.x - 1) / Db.x, (height + Db.y - 1) / Db.y);
|
||||
|
||||
cuda_kernel_texture_volume<<<Dg, Db>>>((unsigned char *)surface, width,
|
||||
height, depth, pitch, pitchSlice);
|
||||
|
||||
error = cudaGetLastError();
|
||||
|
||||
if (error != cudaSuccess) {
|
||||
printf("cuda_kernel_texture_volume() failed to launch error = %d\n", error);
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user