mirror of
				https://github.com/NVIDIA/cuda-samples.git
				synced 2025-11-04 15:47:50 +08:00 
			
		
		
		
	Remove legacy Direct3D 9 and 10 interoperability samples
This commit is contained in:
		
							parent
							
								
									37922e6429
								
							
						
					
					
						commit
						035dcfd357
					
				@ -15,6 +15,15 @@
 | 
			
		||||
        * `cuHook` demonstrating dlsym hooks. (reason: incompatible with modern `glibc`)
 | 
			
		||||
    * `4_CUDA_Libraries`
 | 
			
		||||
        * `batchedLabelMarkersAndLabelCompressionNPP` demonstrating NPP features (reason: some functionality removed from library)
 | 
			
		||||
    * `5_Domain_Specific`
 | 
			
		||||
        * Legacy Direct3D 9 and 10 interoperability samples:
 | 
			
		||||
            * `simpleD3D10`
 | 
			
		||||
            * `simpleD3D10RenderTarget`
 | 
			
		||||
            * `simpleD3D10Texture`
 | 
			
		||||
            * `simpleD3D9`
 | 
			
		||||
            * `simpleD3D9Texture`
 | 
			
		||||
            * `SLID3D10Texture`
 | 
			
		||||
            * `VFlockingD3D10`
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1,46 +0,0 @@
 | 
			
		||||
################################################################################
 | 
			
		||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
#
 | 
			
		||||
# Redistribution and use in source and binary forms, with or without
 | 
			
		||||
# modification, are permitted provided that the following conditions
 | 
			
		||||
# are met:
 | 
			
		||||
#  * Redistributions of source code must retain the above copyright
 | 
			
		||||
#    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
#  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
#    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
#    documentation and/or other materials provided with the distribution.
 | 
			
		||||
#  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
#    contributors may be used to endorse or promote products derived
 | 
			
		||||
#    from this software without specific prior written permission.
 | 
			
		||||
#
 | 
			
		||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
#
 | 
			
		||||
################################################################################
 | 
			
		||||
#
 | 
			
		||||
# Makefile project only supported on Mac OS X and Linux Platforms)
 | 
			
		||||
#
 | 
			
		||||
################################################################################
 | 
			
		||||
 | 
			
		||||
# Target rules
 | 
			
		||||
all: build
 | 
			
		||||
 | 
			
		||||
build:
 | 
			
		||||
	$(info >>> WARNING - SLID3D10Texture is not supported on Linux - waiving sample <<<)
 | 
			
		||||
 | 
			
		||||
run: build
 | 
			
		||||
 | 
			
		||||
testrun: build
 | 
			
		||||
 | 
			
		||||
clean:
 | 
			
		||||
 | 
			
		||||
clobber: clean
 | 
			
		||||
@ -1,52 +0,0 @@
 | 
			
		||||
# SLID3D10Texture - SLI D3D10 Texture
 | 
			
		||||
 | 
			
		||||
## Description
 | 
			
		||||
 | 
			
		||||
Simple program which demonstrates SLI with Direct3D10 Texture interoperability with CUDA.  The program creates a D3D10 Texture which is written to from a CUDA kernel. Direct3D then renders the results on the screen.  A Direct3D Capable device is required.
 | 
			
		||||
 | 
			
		||||
## Key Concepts
 | 
			
		||||
 | 
			
		||||
Performance Strategies, Graphics Interop, Image Processing, 2D Textures
 | 
			
		||||
 | 
			
		||||
## Supported SM Architectures
 | 
			
		||||
 | 
			
		||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.9 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 | 
			
		||||
 | 
			
		||||
## Supported OSes
 | 
			
		||||
 | 
			
		||||
Windows
 | 
			
		||||
 | 
			
		||||
## Supported CPU Architecture
 | 
			
		||||
 | 
			
		||||
x86_64
 | 
			
		||||
 | 
			
		||||
## CUDA APIs involved
 | 
			
		||||
 | 
			
		||||
### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
 | 
			
		||||
cuCtxPushCurrent, cuCtxPopCurrent
 | 
			
		||||
 | 
			
		||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 | 
			
		||||
cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaGetErrorString, cudaFree, cudaGraphicsResourceSetMapFlags, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGetDeviceProperties
 | 
			
		||||
 | 
			
		||||
## Dependencies needed to build/run
 | 
			
		||||
[DirectX](../../../README.md#directx)
 | 
			
		||||
 | 
			
		||||
## Prerequisites
 | 
			
		||||
 | 
			
		||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 | 
			
		||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 | 
			
		||||
 | 
			
		||||
## Build and Run
 | 
			
		||||
 | 
			
		||||
### Windows
 | 
			
		||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
 | 
			
		||||
```
 | 
			
		||||
*_vs<version>.sln - for Visual Studio <version>
 | 
			
		||||
```
 | 
			
		||||
Each individual sample has its own set of solution files in its directory:
 | 
			
		||||
 | 
			
		||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
 | 
			
		||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
 | 
			
		||||
 | 
			
		||||
## References (for more details)
 | 
			
		||||
 | 
			
		||||
@ -1,844 +0,0 @@
 | 
			
		||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
//
 | 
			
		||||
// This example demonstrates interoperability of SLI with a Direct3D10 texture
 | 
			
		||||
// and CUDA. The program creates a D3D10 texture which is written from
 | 
			
		||||
// a CUDA kernel. Direct3D then renders the result on the screen.
 | 
			
		||||
// A Direct3D Capable device is required.
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
#pragma warning(disable : 4312)
 | 
			
		||||
 | 
			
		||||
#include <windows.h>
 | 
			
		||||
#include <mmsystem.h>
 | 
			
		||||
 | 
			
		||||
#pragma warning(disable : 4996)  // disable deprecated warning
 | 
			
		||||
#include <strsafe.h>
 | 
			
		||||
#pragma warning(default : 4996)
 | 
			
		||||
 | 
			
		||||
// this header includes all the necessary D3D10 includes
 | 
			
		||||
#include <dynlink_d3d10.h>
 | 
			
		||||
 | 
			
		||||
// includes, cuda
 | 
			
		||||
#include <cuda.h>
 | 
			
		||||
#include <builtin_types.h>
 | 
			
		||||
#include <cuda_runtime.h>
 | 
			
		||||
#include <cuda_d3d10_interop.h>
 | 
			
		||||
#include <d3d10.h>
 | 
			
		||||
 | 
			
		||||
// includes, project
 | 
			
		||||
#include <rendercheck_d3d10.h>  // automated testing
 | 
			
		||||
#include <helper_cuda.h>  // helper for CUDA error checking and initialization
 | 
			
		||||
 | 
			
		||||
#define MAX_EPSILON 10
 | 
			
		||||
 | 
			
		||||
static char *SDK_name = "SLID3D10Texture";
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Global variables
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
IDXGIAdapter *g_pCudaCapableAdapter = NULL;  // Adapter to use
 | 
			
		||||
ID3D10Device *g_pd3dDevice = NULL;           // Our rendering device
 | 
			
		||||
IDXGISwapChain *g_pSwapChain = NULL;         // The swap chain of the window
 | 
			
		||||
ID3D10RenderTargetView *g_pSwapChainRTV =
 | 
			
		||||
    NULL;  // The Render target view on the swap chain ( used for clear)
 | 
			
		||||
ID3D10RasterizerState *g_pRasterState = NULL;
 | 
			
		||||
 | 
			
		||||
ID3D10InputLayout *g_pInputLayout = NULL;
 | 
			
		||||
ID3D10Effect *g_pSimpleEffect = NULL;
 | 
			
		||||
ID3D10EffectTechnique *g_pSimpleTechnique = NULL;
 | 
			
		||||
ID3D10EffectVectorVariable *g_pvQuadRect = NULL;
 | 
			
		||||
ID3D10EffectShaderResourceVariable *g_pTexture2D = NULL;
 | 
			
		||||
 | 
			
		||||
static const char g_simpleEffectSrc[] =
 | 
			
		||||
    "float4 g_vQuadRect; \n"
 | 
			
		||||
    "Texture2D g_Texture2D; \n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "SamplerState samLinear{ \n"
 | 
			
		||||
    "    Filter = MIN_MAG_LINEAR_MIP_POINT; \n"
 | 
			
		||||
    "};\n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "struct Fragment{ \n"
 | 
			
		||||
    "    float4 Pos : SV_POSITION;\n"
 | 
			
		||||
    "    float3 Tex : TEXCOORD0; };\n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "Fragment VS( uint vertexId : SV_VertexID )\n"
 | 
			
		||||
    "{\n"
 | 
			
		||||
    "    Fragment f;\n"
 | 
			
		||||
    "    f.Tex = float3( 0.f, 0.f, 0.f); \n"
 | 
			
		||||
    "    if (vertexId == 1) f.Tex.x = 1.f; \n"
 | 
			
		||||
    "    else if (vertexId == 2) f.Tex.y = 1.f; \n"
 | 
			
		||||
    "    else if (vertexId == 3) f.Tex.xy = float2(1.f, 1.f); \n"
 | 
			
		||||
    "    \n"
 | 
			
		||||
    "    f.Pos = float4( g_vQuadRect.xy + f.Tex * g_vQuadRect.zw, 0, 1);\n"
 | 
			
		||||
    "    \n"
 | 
			
		||||
    "    return f;\n"
 | 
			
		||||
    "}\n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "float4 PS( Fragment f ) : SV_Target\n"
 | 
			
		||||
    "{\n"
 | 
			
		||||
    "    // return g_Texture2D.Sample( samLinear, f.Tex.xy ); \n"
 | 
			
		||||
    "    // return float4(f.Tex, 1);\n"
 | 
			
		||||
    "    float4 g = g_Texture2D.Sample( samLinear, f.Tex.xy );"
 | 
			
		||||
    "    for (int i = 0; i < 1024; ++i) { "
 | 
			
		||||
    "        g.x = sqrt(g.x);"
 | 
			
		||||
    "        g.x += 0.0001;"
 | 
			
		||||
    "        g.x = g.x * g.x;"
 | 
			
		||||
    "    }"
 | 
			
		||||
    "    return g;\n"
 | 
			
		||||
    "}\n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "technique10 Render\n"
 | 
			
		||||
    "{\n"
 | 
			
		||||
    "    pass P0\n"
 | 
			
		||||
    "    {\n"
 | 
			
		||||
    "        SetVertexShader( CompileShader( vs_4_0, VS() ) );\n"
 | 
			
		||||
    "        SetGeometryShader( NULL );\n"
 | 
			
		||||
    "        SetPixelShader( CompileShader( ps_4_0, PS() ) );\n"
 | 
			
		||||
    "    }\n"
 | 
			
		||||
    "}\n"
 | 
			
		||||
    "\n";
 | 
			
		||||
 | 
			
		||||
// testing/tracing function used pervasively in tests. If the condition is
 | 
			
		||||
// unsatisfied
 | 
			
		||||
// then spew and fail the function immediately (doing no cleanup)
 | 
			
		||||
#define AssertOrQuit(x)                                                  \
 | 
			
		||||
  if (!(x)) {                                                            \
 | 
			
		||||
    fprintf(stdout, "Assert unsatisfied in %s at %s:%d\n", __FUNCTION__, \
 | 
			
		||||
            __FILE__, __LINE__);                                         \
 | 
			
		||||
    return 1;                                                            \
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
bool g_bDone = false;
 | 
			
		||||
bool g_bPassed = true;
 | 
			
		||||
 | 
			
		||||
int *pArgc = NULL;
 | 
			
		||||
char **pArgv = NULL;
 | 
			
		||||
 | 
			
		||||
const unsigned int g_WindowWidth = 720;
 | 
			
		||||
const unsigned int g_WindowHeight = 720;
 | 
			
		||||
 | 
			
		||||
bool g_bQAReadback = false;
 | 
			
		||||
int g_iFrameToCompare = 1;
 | 
			
		||||
 | 
			
		||||
struct CudaContextData {
 | 
			
		||||
  UINT index;
 | 
			
		||||
  CUcontext context;
 | 
			
		||||
  int deviceOrdinal;
 | 
			
		||||
  cudaGraphicsResource *cudaResource;
 | 
			
		||||
  void *cudaLinearMemory;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
UINT g_ContextCount = 0;
 | 
			
		||||
CudaContextData g_ContextData[32];
 | 
			
		||||
 | 
			
		||||
// Data structure for 2D texture shared between DX10 and CUDA
 | 
			
		||||
struct {
 | 
			
		||||
  ID3D10Texture2D *pTexture;
 | 
			
		||||
  ID3D10ShaderResourceView *pSRView;
 | 
			
		||||
  size_t pitch;
 | 
			
		||||
  int width;
 | 
			
		||||
  int height;
 | 
			
		||||
} g_texture_2d;
 | 
			
		||||
 | 
			
		||||
// The CUDA kernel launchers that get called
 | 
			
		||||
extern "C" {
 | 
			
		||||
bool cuda_texture_2d(void *surface, size_t width, size_t height, size_t pitch,
 | 
			
		||||
                     float t);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Forward declarations
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
HRESULT InitD3D(HWND hWnd);
 | 
			
		||||
HRESULT InitTextures();
 | 
			
		||||
 | 
			
		||||
int RunKernels(CudaContextData *currentContextData);
 | 
			
		||||
void DrawScene();
 | 
			
		||||
int Cleanup();
 | 
			
		||||
int Render();
 | 
			
		||||
 | 
			
		||||
LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
 | 
			
		||||
 | 
			
		||||
#define NAME_LEN 512
 | 
			
		||||
 | 
			
		||||
bool findCUDADevice() {
 | 
			
		||||
  int nGraphicsGPU = 0;
 | 
			
		||||
  int deviceCount = 0;
 | 
			
		||||
  bool bFoundGraphics = false;
 | 
			
		||||
  char devname[NAME_LEN];
 | 
			
		||||
 | 
			
		||||
  // This function call returns 0 if there are no CUDA capable devices.
 | 
			
		||||
  cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
 | 
			
		||||
 | 
			
		||||
  if (error_id != cudaSuccess) {
 | 
			
		||||
    printf("cudaGetDeviceCount returned %d\n-> %s\n", (int)error_id,
 | 
			
		||||
           cudaGetErrorString(error_id));
 | 
			
		||||
    exit(EXIT_FAILURE);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (deviceCount == 0) {
 | 
			
		||||
    printf("> There are no device(s) supporting CUDA\n");
 | 
			
		||||
    return false;
 | 
			
		||||
  } else {
 | 
			
		||||
    printf("> Found %d CUDA Capable Device(s)\n", deviceCount);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Get CUDA device properties
 | 
			
		||||
  cudaDeviceProp deviceProp;
 | 
			
		||||
 | 
			
		||||
  for (int dev = 0; dev < deviceCount; ++dev) {
 | 
			
		||||
    cudaGetDeviceProperties(&deviceProp, dev);
 | 
			
		||||
    STRCPY(devname, NAME_LEN, deviceProp.name);
 | 
			
		||||
    printf("> GPU %d: %s\n", dev, devname);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool findDXDevice(char *dev_name) {
 | 
			
		||||
  HRESULT hr = S_OK;
 | 
			
		||||
  cudaError cuStatus;
 | 
			
		||||
 | 
			
		||||
  // Iterate through the candidate adapters
 | 
			
		||||
  IDXGIFactory *pFactory;
 | 
			
		||||
  hr = sFnPtr_CreateDXGIFactory(__uuidof(IDXGIFactory), (void **)(&pFactory));
 | 
			
		||||
 | 
			
		||||
  if (!SUCCEEDED(hr)) {
 | 
			
		||||
    printf("> No DXGI Factory created.\n");
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  UINT adapter = 0;
 | 
			
		||||
 | 
			
		||||
  for (; !g_pCudaCapableAdapter; ++adapter) {
 | 
			
		||||
    // Get a candidate DXGI adapter
 | 
			
		||||
    IDXGIAdapter *pAdapter = NULL;
 | 
			
		||||
    hr = pFactory->EnumAdapters(adapter, &pAdapter);
 | 
			
		||||
 | 
			
		||||
    if (FAILED(hr)) {
 | 
			
		||||
      break;  // no compatible adapters found
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Query to see if there exists a corresponding compute device
 | 
			
		||||
    int cuDevice;
 | 
			
		||||
    cuStatus = cudaD3D10GetDevice(&cuDevice, pAdapter);
 | 
			
		||||
    printLastCudaError("cudaD3D10GetDevice failed");  // This prints and resets
 | 
			
		||||
                                                      // the cudaError to
 | 
			
		||||
                                                      // cudaSuccess
 | 
			
		||||
 | 
			
		||||
    if (cudaSuccess == cuStatus) {
 | 
			
		||||
      // If so, mark it as the one against which to create our d3d10 device
 | 
			
		||||
      g_pCudaCapableAdapter = pAdapter;
 | 
			
		||||
      g_pCudaCapableAdapter->AddRef();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pAdapter->Release();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  printf("> Found %d D3D10 Adapater(s).\n", (int)adapter);
 | 
			
		||||
 | 
			
		||||
  pFactory->Release();
 | 
			
		||||
 | 
			
		||||
  if (!g_pCudaCapableAdapter) {
 | 
			
		||||
    printf("> Found 0 D3D10 Adapater(s) /w Compute capability.\n");
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  DXGI_ADAPTER_DESC adapterDesc;
 | 
			
		||||
  g_pCudaCapableAdapter->GetDesc(&adapterDesc);
 | 
			
		||||
  wcstombs_s(NULL, dev_name, 256, adapterDesc.Description, 128);
 | 
			
		||||
 | 
			
		||||
  printf("> Found 1 D3D10 Adapater(s) /w Compute capability.\n");
 | 
			
		||||
  printf("> %s\n", dev_name);
 | 
			
		||||
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Program main
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
int main(int argc, char *argv[]) {
 | 
			
		||||
  char device_name[256];
 | 
			
		||||
  char *ref_file = NULL;
 | 
			
		||||
 | 
			
		||||
  pArgc = &argc;
 | 
			
		||||
  pArgv = argv;
 | 
			
		||||
 | 
			
		||||
  printf("%s Starting...\n\n", SDK_name);
 | 
			
		||||
 | 
			
		||||
  if (!findCUDADevice())  // Search for CUDA GPU
 | 
			
		||||
  {
 | 
			
		||||
    printf("> CUDA Device NOT found on \"%s\".. Exiting.\n", device_name);
 | 
			
		||||
    exit(EXIT_SUCCESS);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (!dynlinkLoadD3D10API())  // Search for D3D API (locate drivers, does not
 | 
			
		||||
                               // mean device is found)
 | 
			
		||||
  {
 | 
			
		||||
    printf("> D3D10 API libraries NOT found on.. Exiting.\n");
 | 
			
		||||
    dynlinkUnloadD3D10API();
 | 
			
		||||
    exit(EXIT_SUCCESS);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (!findDXDevice(device_name))  // Search for D3D Hardware Device
 | 
			
		||||
  {
 | 
			
		||||
    printf("> D3D10 Graphics Device NOT found.. Exiting.\n");
 | 
			
		||||
    dynlinkUnloadD3D10API();
 | 
			
		||||
    exit(EXIT_SUCCESS);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // command line options
 | 
			
		||||
  if (checkCmdLineFlag(argc, (const char **)argv, "file")) {
 | 
			
		||||
    g_bQAReadback = true;
 | 
			
		||||
    getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // create window
 | 
			
		||||
  //
 | 
			
		||||
  // Register the window class
 | 
			
		||||
  WNDCLASSEX wc = {sizeof(WNDCLASSEX),
 | 
			
		||||
                   CS_CLASSDC,
 | 
			
		||||
                   MsgProc,
 | 
			
		||||
                   0L,
 | 
			
		||||
                   0L,
 | 
			
		||||
                   GetModuleHandle(NULL),
 | 
			
		||||
                   NULL,
 | 
			
		||||
                   NULL,
 | 
			
		||||
                   NULL,
 | 
			
		||||
                   NULL,
 | 
			
		||||
                   "CUDA SDK",
 | 
			
		||||
                   NULL};
 | 
			
		||||
  RegisterClassEx(&wc);
 | 
			
		||||
 | 
			
		||||
  // Create the application's window
 | 
			
		||||
  HWND hWnd = CreateWindow(wc.lpszClassName, "CUDA-SLI Interop, D3D10",
 | 
			
		||||
                           WS_OVERLAPPEDWINDOW, 0, 0, g_WindowWidth,
 | 
			
		||||
                           g_WindowHeight, NULL, NULL, wc.hInstance, NULL);
 | 
			
		||||
 | 
			
		||||
  ShowWindow(hWnd, SW_SHOWDEFAULT);
 | 
			
		||||
  UpdateWindow(hWnd);
 | 
			
		||||
 | 
			
		||||
  // Initialize Direct3D
 | 
			
		||||
  if (SUCCEEDED(InitD3D(hWnd)) && SUCCEEDED(InitTextures())) {
 | 
			
		||||
    CUresult result = CUDA_SUCCESS;
 | 
			
		||||
    cudaError_t error = cudaSuccess;
 | 
			
		||||
 | 
			
		||||
    // get the list of interop devices
 | 
			
		||||
    {
 | 
			
		||||
      unsigned int interopDeviceCount = 0;
 | 
			
		||||
      int interopDevices[32];
 | 
			
		||||
      error = cudaD3D10GetDevices(&interopDeviceCount, interopDevices, 32,
 | 
			
		||||
                                  g_pd3dDevice, cudaD3D10DeviceListAll);
 | 
			
		||||
      printLastCudaError("cudaD3D10GetDevices failed");  // This prints and
 | 
			
		||||
                                                         // resets the cudaError
 | 
			
		||||
                                                         // to cudaSuccess
 | 
			
		||||
      AssertOrQuit(cudaSuccess == error);
 | 
			
		||||
 | 
			
		||||
      g_ContextCount = interopDeviceCount;
 | 
			
		||||
 | 
			
		||||
      for (UINT i = 0; i < interopDeviceCount; ++i) {
 | 
			
		||||
        g_ContextData[i].index = i;
 | 
			
		||||
        g_ContextData[i].deviceOrdinal = interopDevices[i];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Initialize g_ContextCount interop contexts on the device,
 | 
			
		||||
    // striping across AFR groups
 | 
			
		||||
    for (UINT i = 0; i < g_ContextCount; ++i) {
 | 
			
		||||
      printf("Creating context %d on device %d\n", g_ContextData[i].index,
 | 
			
		||||
             g_ContextData[i].deviceOrdinal);
 | 
			
		||||
 | 
			
		||||
      // create a context
 | 
			
		||||
      error = cudaD3D10SetDirect3DDevice(g_pd3dDevice,
 | 
			
		||||
                                         g_ContextData[i].deviceOrdinal);
 | 
			
		||||
      AssertOrQuit(cudaSuccess == error);
 | 
			
		||||
      error = cudaFree(0);
 | 
			
		||||
      AssertOrQuit(cudaSuccess == error);
 | 
			
		||||
 | 
			
		||||
      // allocate a buffer
 | 
			
		||||
      // error = cudaMalloc((void**)&g_ContextData[i].buffer, BYTES_PER_PIXEL);
 | 
			
		||||
      cudaMallocPitch(&g_ContextData[i].cudaLinearMemory, &g_texture_2d.pitch,
 | 
			
		||||
                      g_texture_2d.width * sizeof(float) * 4,
 | 
			
		||||
                      g_texture_2d.height);
 | 
			
		||||
      getLastCudaError("cudaMallocPitch (g_texture_2d) failed");
 | 
			
		||||
      cudaMemset(g_ContextData[i].cudaLinearMemory, 1,
 | 
			
		||||
                 g_texture_2d.pitch * g_texture_2d.height);
 | 
			
		||||
 | 
			
		||||
      AssertOrQuit(cudaSuccess == error);
 | 
			
		||||
 | 
			
		||||
      // pop the context
 | 
			
		||||
      result = cuCtxPopCurrent(&g_ContextData[i].context);
 | 
			
		||||
      AssertOrQuit(CUDA_SUCCESS == result);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Register the texture with all contexts
 | 
			
		||||
    for (UINT i = 0; i < g_ContextCount; ++i) {
 | 
			
		||||
      printf("Registering texture with context %d\n", i);
 | 
			
		||||
      result = cuCtxPushCurrent(g_ContextData[i].context);
 | 
			
		||||
      AssertOrQuit(CUDA_SUCCESS == result);
 | 
			
		||||
      {
 | 
			
		||||
        // Register the resource
 | 
			
		||||
        error = cudaGraphicsD3D10RegisterResource(
 | 
			
		||||
            &g_ContextData[i].cudaResource, g_texture_2d.pTexture,
 | 
			
		||||
            cudaGraphicsRegisterFlagsNone);
 | 
			
		||||
        getLastCudaError(
 | 
			
		||||
            "cudaGraphicsD3D10RegisterResource (g_texture_2d) failed");
 | 
			
		||||
 | 
			
		||||
        error = cudaGraphicsResourceSetMapFlags(g_ContextData[i].cudaResource,
 | 
			
		||||
                                                cudaD3D10MapFlagsWriteDiscard);
 | 
			
		||||
        getLastCudaError(
 | 
			
		||||
            "cudaGraphicsResourceSetMapFlags (g_texture_2d) failed");
 | 
			
		||||
 | 
			
		||||
        AssertOrQuit(cudaSuccess == error);
 | 
			
		||||
      }
 | 
			
		||||
      result = cuCtxPopCurrent(&g_ContextData[i].context);
 | 
			
		||||
      AssertOrQuit(CUDA_SUCCESS == result);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // the main loop
 | 
			
		||||
  //
 | 
			
		||||
  while (false == g_bDone) {
 | 
			
		||||
    Render();
 | 
			
		||||
 | 
			
		||||
    //
 | 
			
		||||
    // handle I/O
 | 
			
		||||
    //
 | 
			
		||||
    MSG msg;
 | 
			
		||||
    ZeroMemory(&msg, sizeof(msg));
 | 
			
		||||
 | 
			
		||||
    while (msg.message != WM_QUIT) {
 | 
			
		||||
      if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE)) {
 | 
			
		||||
        TranslateMessage(&msg);
 | 
			
		||||
        DispatchMessage(&msg);
 | 
			
		||||
      } else {
 | 
			
		||||
        Render();
 | 
			
		||||
 | 
			
		||||
        if (ref_file) {
 | 
			
		||||
          for (int count = 0; count < g_iFrameToCompare; count++) {
 | 
			
		||||
            Render();
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          const char *cur_image_path = "SLID3D10Texture.ppm";
 | 
			
		||||
 | 
			
		||||
          // Save a reference of our current test run image
 | 
			
		||||
          CheckRenderD3D10::ActiveRenderTargetToPPM(g_pd3dDevice,
 | 
			
		||||
                                                    cur_image_path);
 | 
			
		||||
 | 
			
		||||
          // compare to offical reference image, printing PASS or FAIL.
 | 
			
		||||
          g_bPassed = CheckRenderD3D10::PPMvsPPM(cur_image_path, ref_file,
 | 
			
		||||
                                                 argv[0], MAX_EPSILON, 0.15f);
 | 
			
		||||
 | 
			
		||||
          g_bDone = true;
 | 
			
		||||
 | 
			
		||||
          Cleanup();
 | 
			
		||||
 | 
			
		||||
          PostQuitMessage(0);
 | 
			
		||||
        } else {
 | 
			
		||||
          g_bPassed = true;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  // Unregister windows class
 | 
			
		||||
  UnregisterClass(wc.lpszClassName, wc.hInstance);
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // and exit
 | 
			
		||||
  //
 | 
			
		||||
  printf("> %s running on %s exiting...\n", SDK_name, device_name);
 | 
			
		||||
 | 
			
		||||
  printf(g_bPassed ? "Test images compared OK\n"
 | 
			
		||||
                   : "Test images are Different!\n");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: InitD3D()
 | 
			
		||||
// Desc: Initializes Direct3D
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
HRESULT InitD3D(HWND hWnd) {
 | 
			
		||||
  // Set up the structure used to create the device and swapchain
 | 
			
		||||
  DXGI_SWAP_CHAIN_DESC sd;
 | 
			
		||||
  ZeroMemory(&sd, sizeof(sd));
 | 
			
		||||
  sd.BufferCount = 1;
 | 
			
		||||
  sd.BufferDesc.Width = g_WindowWidth;
 | 
			
		||||
  sd.BufferDesc.Height = g_WindowHeight;
 | 
			
		||||
  sd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
 | 
			
		||||
  sd.BufferDesc.RefreshRate.Numerator = 60;
 | 
			
		||||
  sd.BufferDesc.RefreshRate.Denominator = 1;
 | 
			
		||||
  sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
 | 
			
		||||
  sd.OutputWindow = hWnd;
 | 
			
		||||
  sd.SampleDesc.Count = 1;
 | 
			
		||||
  sd.SampleDesc.Quality = 0;
 | 
			
		||||
  sd.Windowed = TRUE;
 | 
			
		||||
 | 
			
		||||
  // Create device and swapchain
 | 
			
		||||
  HRESULT hr = sFnPtr_D3D10CreateDeviceAndSwapChain(
 | 
			
		||||
      g_pCudaCapableAdapter, D3D10_DRIVER_TYPE_HARDWARE, NULL, 0,
 | 
			
		||||
      D3D10_SDK_VERSION, &sd, &g_pSwapChain, &g_pd3dDevice);
 | 
			
		||||
  AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
  g_pCudaCapableAdapter->Release();
 | 
			
		||||
 | 
			
		||||
  // Create a render target view of the swapchain
 | 
			
		||||
  ID3D10Texture2D *pBuffer;
 | 
			
		||||
  hr =
 | 
			
		||||
      g_pSwapChain->GetBuffer(0, __uuidof(ID3D10Texture2D), (LPVOID *)&pBuffer);
 | 
			
		||||
  AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
 | 
			
		||||
  hr = g_pd3dDevice->CreateRenderTargetView(pBuffer, NULL, &g_pSwapChainRTV);
 | 
			
		||||
  AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
  pBuffer->Release();
 | 
			
		||||
 | 
			
		||||
  g_pd3dDevice->OMSetRenderTargets(1, &g_pSwapChainRTV, NULL);
 | 
			
		||||
 | 
			
		||||
  // Setup the viewport
 | 
			
		||||
  D3D10_VIEWPORT vp;
 | 
			
		||||
  vp.Width = g_WindowWidth;
 | 
			
		||||
  vp.Height = g_WindowHeight;
 | 
			
		||||
  vp.MinDepth = 0.0f;
 | 
			
		||||
  vp.MaxDepth = 1.0f;
 | 
			
		||||
  vp.TopLeftX = 0;
 | 
			
		||||
  vp.TopLeftY = 0;
 | 
			
		||||
  g_pd3dDevice->RSSetViewports(1, &vp);
 | 
			
		||||
 | 
			
		||||
  // Setup the effect
 | 
			
		||||
  {
 | 
			
		||||
    ID3D10Blob *pCompiledEffect;
 | 
			
		||||
    ID3D10Blob *pErrors = NULL;
 | 
			
		||||
    hr = sFnPtr_D3D10CompileEffectFromMemory((void *)g_simpleEffectSrc,
 | 
			
		||||
                                             sizeof(g_simpleEffectSrc), NULL,
 | 
			
		||||
                                             NULL,  // pDefines
 | 
			
		||||
                                             NULL,  // pIncludes
 | 
			
		||||
                                             0,     // HLSL flags
 | 
			
		||||
                                             0,     // FXFlags
 | 
			
		||||
                                             &pCompiledEffect, &pErrors);
 | 
			
		||||
 | 
			
		||||
    if (pErrors) {
 | 
			
		||||
      LPVOID l_pError = NULL;
 | 
			
		||||
      l_pError = pErrors->GetBufferPointer();  // then cast to a char* to see it
 | 
			
		||||
                                               // in the locals window
 | 
			
		||||
      fprintf(stdout, "Compilation error: \n %s", (char *)l_pError);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
 | 
			
		||||
    hr = sFnPtr_D3D10CreateEffectFromMemory(
 | 
			
		||||
        pCompiledEffect->GetBufferPointer(), pCompiledEffect->GetBufferSize(),
 | 
			
		||||
        0,  // FXFlags
 | 
			
		||||
        g_pd3dDevice, NULL, &g_pSimpleEffect);
 | 
			
		||||
    pCompiledEffect->Release();
 | 
			
		||||
 | 
			
		||||
    g_pSimpleTechnique = g_pSimpleEffect->GetTechniqueByName("Render");
 | 
			
		||||
 | 
			
		||||
    g_pvQuadRect =
 | 
			
		||||
        g_pSimpleEffect->GetVariableByName("g_vQuadRect")->AsVector();
 | 
			
		||||
 | 
			
		||||
    g_pTexture2D =
 | 
			
		||||
        g_pSimpleEffect->GetVariableByName("g_Texture2D")->AsShaderResource();
 | 
			
		||||
 | 
			
		||||
    // Setup  no Input Layout
 | 
			
		||||
    g_pd3dDevice->IASetInputLayout(0);
 | 
			
		||||
    g_pd3dDevice->IASetPrimitiveTopology(
 | 
			
		||||
        D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  D3D10_RASTERIZER_DESC rasterizerState;
 | 
			
		||||
  rasterizerState.FillMode = D3D10_FILL_SOLID;
 | 
			
		||||
  rasterizerState.CullMode = D3D10_CULL_FRONT;
 | 
			
		||||
  rasterizerState.FrontCounterClockwise = false;
 | 
			
		||||
  rasterizerState.DepthBias = false;
 | 
			
		||||
  rasterizerState.DepthBiasClamp = 0;
 | 
			
		||||
  rasterizerState.SlopeScaledDepthBias = 0;
 | 
			
		||||
  rasterizerState.DepthClipEnable = false;
 | 
			
		||||
  rasterizerState.ScissorEnable = false;
 | 
			
		||||
  rasterizerState.MultisampleEnable = false;
 | 
			
		||||
  rasterizerState.AntialiasedLineEnable = false;
 | 
			
		||||
  g_pd3dDevice->CreateRasterizerState(&rasterizerState, &g_pRasterState);
 | 
			
		||||
  g_pd3dDevice->RSSetState(g_pRasterState);
 | 
			
		||||
 | 
			
		||||
  return S_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: InitTextures()
 | 
			
		||||
// Desc: Initializes Direct3D Textures (allocation and initialization)
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
HRESULT InitTextures() {
 | 
			
		||||
  //
 | 
			
		||||
  // create the D3D resources we'll be using
 | 
			
		||||
  //
 | 
			
		||||
  // 2D texture
 | 
			
		||||
  {
 | 
			
		||||
    g_texture_2d.width = 768;
 | 
			
		||||
    g_texture_2d.height = 768;
 | 
			
		||||
 | 
			
		||||
    D3D10_TEXTURE2D_DESC desc;
 | 
			
		||||
    ZeroMemory(&desc, sizeof(D3D10_TEXTURE2D_DESC));
 | 
			
		||||
    desc.Width = g_texture_2d.width;
 | 
			
		||||
    desc.Height = g_texture_2d.height;
 | 
			
		||||
    desc.MipLevels = 1;
 | 
			
		||||
    desc.ArraySize = 1;
 | 
			
		||||
    desc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
 | 
			
		||||
    desc.SampleDesc.Count = 1;
 | 
			
		||||
    desc.Usage = D3D10_USAGE_DEFAULT;
 | 
			
		||||
    desc.BindFlags = D3D10_BIND_SHADER_RESOURCE;
 | 
			
		||||
 | 
			
		||||
    if (FAILED(
 | 
			
		||||
            g_pd3dDevice->CreateTexture2D(&desc, NULL, &g_texture_2d.pTexture)))
 | 
			
		||||
      return E_FAIL;
 | 
			
		||||
 | 
			
		||||
    if (FAILED(g_pd3dDevice->CreateShaderResourceView(
 | 
			
		||||
            g_texture_2d.pTexture, NULL, &g_texture_2d.pSRView)))
 | 
			
		||||
      return E_FAIL;
 | 
			
		||||
 | 
			
		||||
    g_pTexture2D->SetResource(g_texture_2d.pSRView);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return S_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! Run the Cuda part of the computation
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
int RunKernels(CudaContextData *currentContextData) {
 | 
			
		||||
  static float t = 0.0f;
 | 
			
		||||
 | 
			
		||||
  // populate the 2d texture
 | 
			
		||||
  {
 | 
			
		||||
    cudaArray *cuArray;
 | 
			
		||||
    cudaGraphicsSubResourceGetMappedArray(
 | 
			
		||||
        &cuArray, currentContextData->cudaResource, 0, 0);
 | 
			
		||||
    getLastCudaError(
 | 
			
		||||
        "cudaGraphicsSubResourceGetMappedArray (cuda_texture_2d) failed");
 | 
			
		||||
 | 
			
		||||
    // kick off the kernel and send the staging buffer cudaLinearMemory as an
 | 
			
		||||
    // argument to allow the kernel to write to it
 | 
			
		||||
    cuda_texture_2d(currentContextData->cudaLinearMemory, g_texture_2d.width,
 | 
			
		||||
                    g_texture_2d.height, g_texture_2d.pitch,
 | 
			
		||||
                    g_bQAReadback ? 0.2f : t);
 | 
			
		||||
    getLastCudaError("cuda_texture_2d failed");
 | 
			
		||||
 | 
			
		||||
    // then we want to copy cudaLinearMemory to the D3D texture, via its mapped
 | 
			
		||||
    // form : cudaArray
 | 
			
		||||
    cudaMemcpy2DToArray(
 | 
			
		||||
        cuArray,                                                   // dst array
 | 
			
		||||
        0, 0,                                                      // offset
 | 
			
		||||
        currentContextData->cudaLinearMemory, g_texture_2d.pitch,  // src
 | 
			
		||||
        g_texture_2d.width * 4 * sizeof(float), g_texture_2d.height,  // extent
 | 
			
		||||
        cudaMemcpyDeviceToDevice);                                    // kind
 | 
			
		||||
    getLastCudaError("cudaMemcpy2DToArray failed");
 | 
			
		||||
  }
 | 
			
		||||
  t += 0.1f;
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! Draw the final result on the screen
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void DrawScene() {
 | 
			
		||||
  // Clear the backbuffer to a black color
 | 
			
		||||
  float ClearColor[4] = {0.5f, 0.5f, 0.6f, 1.0f};
 | 
			
		||||
  g_pd3dDevice->ClearRenderTargetView(g_pSwapChainRTV, ClearColor);
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // draw the 2d texture
 | 
			
		||||
  //
 | 
			
		||||
  float quadRect[4] = {-0.98f, -0.98f, 1.96f, 1.96f};
 | 
			
		||||
  g_pvQuadRect->SetFloatVector((float *)&quadRect);
 | 
			
		||||
  g_pSimpleTechnique->GetPassByIndex(0)->Apply(0);
 | 
			
		||||
  g_pd3dDevice->Draw(4, 0);
 | 
			
		||||
 | 
			
		||||
  // Present the backbuffer contents to the display
 | 
			
		||||
  g_pSwapChain->Present(0, 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: Cleanup()
 | 
			
		||||
// Desc: Releases all previously initialized objects
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
int Cleanup() {
 | 
			
		||||
  // unregister the Cuda resources
 | 
			
		||||
  CUresult result = CUDA_SUCCESS;
 | 
			
		||||
  cudaError_t error = cudaSuccess;
 | 
			
		||||
 | 
			
		||||
  // Drop the D3D resources' refcounts
 | 
			
		||||
  // Unregister the texture with all contexts
 | 
			
		||||
  for (UINT i = 0; i < g_ContextCount; ++i) {
 | 
			
		||||
    printf("Unregistering texture with context %d\n", i);
 | 
			
		||||
    result = cuCtxPushCurrent(g_ContextData[i].context);
 | 
			
		||||
    AssertOrQuit(CUDA_SUCCESS == result);
 | 
			
		||||
    {
 | 
			
		||||
      // Register the resource
 | 
			
		||||
      error = cudaGraphicsUnregisterResource(g_ContextData[i].cudaResource);
 | 
			
		||||
      AssertOrQuit(cudaSuccess == error);
 | 
			
		||||
    }
 | 
			
		||||
    result = cuCtxPopCurrent(&g_ContextData[i].context);
 | 
			
		||||
    AssertOrQuit(CUDA_SUCCESS == result);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Destroy all contexts
 | 
			
		||||
  for (UINT i = 0; i < g_ContextCount; ++i) {
 | 
			
		||||
    printf("Destroying context %d\n", i);
 | 
			
		||||
    result = cuCtxPushCurrent(g_ContextData[i].context);
 | 
			
		||||
    AssertOrQuit(CUDA_SUCCESS == result);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // clean up Direct3D
 | 
			
		||||
  //
 | 
			
		||||
  {
 | 
			
		||||
    // release the resources we created
 | 
			
		||||
    g_texture_2d.pSRView->Release();
 | 
			
		||||
    g_texture_2d.pTexture->Release();
 | 
			
		||||
 | 
			
		||||
    if (g_pInputLayout != NULL) g_pInputLayout->Release();
 | 
			
		||||
 | 
			
		||||
    if (g_pSimpleEffect != NULL) g_pSimpleEffect->Release();
 | 
			
		||||
 | 
			
		||||
    if (g_pSwapChainRTV != NULL) g_pSwapChainRTV->Release();
 | 
			
		||||
 | 
			
		||||
    if (g_pSwapChain != NULL) g_pSwapChain->Release();
 | 
			
		||||
 | 
			
		||||
    if (g_pd3dDevice != NULL) g_pd3dDevice->Release();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: Render()
 | 
			
		||||
// Desc: Launches the CUDA kernels to fill in the texture data
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
int Render() {
 | 
			
		||||
  //
 | 
			
		||||
  // map the resources we've registered so we can access them in Cuda
 | 
			
		||||
  // - it is most efficient to map and unmap all resources in a single call,
 | 
			
		||||
  //   and to have the map/unmap calls be the boundary between using the GPU
 | 
			
		||||
  //   for Direct3D and Cuda
 | 
			
		||||
  //
 | 
			
		||||
  {
 | 
			
		||||
    cudaStream_t stream = 0;
 | 
			
		||||
 | 
			
		||||
    cudaError_t error = cudaSuccess;
 | 
			
		||||
    CudaContextData *currentContextData = NULL;
 | 
			
		||||
 | 
			
		||||
    // get the current device ordinal
 | 
			
		||||
    static int currentDevice = -1;
 | 
			
		||||
    error = cudaD3D10GetDevices(NULL, ¤tDevice, 1, g_pd3dDevice,
 | 
			
		||||
                                cudaD3D10DeviceListCurrentFrame);
 | 
			
		||||
    printLastCudaError("cudaD3D10GetDevices failed");  // This prints and resets
 | 
			
		||||
                                                       // the cudaError to
 | 
			
		||||
                                                       // cudaSuccess
 | 
			
		||||
    AssertOrQuit(cudaSuccess == error);
 | 
			
		||||
 | 
			
		||||
    static int nextDevice = -1;
 | 
			
		||||
    // assert that querying the next device in AFR isn't broken
 | 
			
		||||
    AssertOrQuit(nextDevice == -1 || nextDevice == currentDevice);
 | 
			
		||||
    error = cudaD3D10GetDevices(NULL, &nextDevice, 1, g_pd3dDevice,
 | 
			
		||||
                                cudaD3D10DeviceListNextFrame);
 | 
			
		||||
    printLastCudaError("cudaD3D10GetDevices failed");  // This prints and resets
 | 
			
		||||
                                                       // the cudaError to
 | 
			
		||||
                                                       // cudaSuccess
 | 
			
		||||
    AssertOrQuit(cudaSuccess == error);
 | 
			
		||||
 | 
			
		||||
    // choose context data corresponding to the current device ordinal
 | 
			
		||||
    for (UINT i = 0; i < g_ContextCount; ++i) {
 | 
			
		||||
      if (currentDevice == g_ContextData[i].deviceOrdinal) {
 | 
			
		||||
        currentContextData = &g_ContextData[i];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    AssertOrQuit(currentContextData);
 | 
			
		||||
 | 
			
		||||
    CUresult result;
 | 
			
		||||
    result = cuCtxPushCurrent(currentContextData->context);
 | 
			
		||||
    AssertOrQuit(CUDA_SUCCESS == result);
 | 
			
		||||
 | 
			
		||||
    cudaGraphicsMapResources(1, ¤tContextData->cudaResource, stream);
 | 
			
		||||
    getLastCudaError("cudaGraphicsMapResources(3) failed");
 | 
			
		||||
 | 
			
		||||
    //
 | 
			
		||||
    // run kernels which will populate the contents of those textures
 | 
			
		||||
    //
 | 
			
		||||
    RunKernels(currentContextData);
 | 
			
		||||
 | 
			
		||||
    //
 | 
			
		||||
    // unmap the resources
 | 
			
		||||
    //
 | 
			
		||||
    cudaGraphicsUnmapResources(1, ¤tContextData->cudaResource, stream);
 | 
			
		||||
    getLastCudaError("cudaGraphicsUnmapResources(3) failed");
 | 
			
		||||
 | 
			
		||||
    CUcontext poppedContext;
 | 
			
		||||
    result = cuCtxPopCurrent(&poppedContext);
 | 
			
		||||
    AssertOrQuit(CUDA_SUCCESS == result);
 | 
			
		||||
    AssertOrQuit(poppedContext == currentContextData->context);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // draw the scene using them
 | 
			
		||||
  //
 | 
			
		||||
  DrawScene();
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: MsgProc()
 | 
			
		||||
// Desc: The window's message handler
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
static LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam,
 | 
			
		||||
                              LPARAM lParam) {
 | 
			
		||||
  switch (msg) {
 | 
			
		||||
    case WM_KEYDOWN:
 | 
			
		||||
      if (wParam == VK_ESCAPE) {
 | 
			
		||||
        g_bDone = true;
 | 
			
		||||
        Cleanup();
 | 
			
		||||
        PostQuitMessage(0);
 | 
			
		||||
        return 0;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
    case WM_DESTROY:
 | 
			
		||||
      g_bDone = true;
 | 
			
		||||
      Cleanup();
 | 
			
		||||
      PostQuitMessage(0);
 | 
			
		||||
      return 0;
 | 
			
		||||
 | 
			
		||||
    case WM_PAINT:
 | 
			
		||||
      ValidateRect(hWnd, NULL);
 | 
			
		||||
      return 0;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return DefWindowProc(hWnd, msg, wParam, lParam);
 | 
			
		||||
}
 | 
			
		||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							@ -1,88 +0,0 @@
 | 
			
		||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
//
 | 
			
		||||
// Paint a 2D texture with a moving red/green hatch pattern on a
 | 
			
		||||
// strobing blue background.  Note that this kernel reads to and
 | 
			
		||||
// writes from the texture, hence why this texture was not mapped
 | 
			
		||||
// as WriteDiscard.
 | 
			
		||||
//
 | 
			
		||||
__global__ void cuda_kernel_texture_2d(unsigned char *surface, int width,
 | 
			
		||||
                                       int height, size_t pitch, float t) {
 | 
			
		||||
  int x = blockIdx.x * blockDim.x + threadIdx.x;
 | 
			
		||||
  int y = blockIdx.y * blockDim.y + threadIdx.y;
 | 
			
		||||
  float *pixel;
 | 
			
		||||
 | 
			
		||||
  // in the case where, due to quantization into grids, we have
 | 
			
		||||
  // more threads than pixels, skip the threads which don't
 | 
			
		||||
  // correspond to valid pixels
 | 
			
		||||
  if (x >= width || y >= height) return;
 | 
			
		||||
 | 
			
		||||
  // get a pointer to the pixel at (x,y)
 | 
			
		||||
  pixel = (float *)(surface + y * pitch) + 4 * x;
 | 
			
		||||
 | 
			
		||||
  // populate it
 | 
			
		||||
  float value_x = 0.5f + 0.5f * cos(t + 10.0f * ((2.0f * x) / width - 1.0f));
 | 
			
		||||
  float value_y = 0.5f + 0.5f * cos(t + 10.0f * ((2.0f * y) / height - 1.0f));
 | 
			
		||||
  pixel[0] = value_x > 0.5 ? 1 : 0;
 | 
			
		||||
  pixel[1] = value_y > 0.5 ? 1 : 0;
 | 
			
		||||
  pixel[2] = 0.5f + 0.5f * cos(t);
 | 
			
		||||
  pixel[3] = 1;  // alpha
 | 
			
		||||
 | 
			
		||||
  for (int i = 0; i < 6; ++i) {
 | 
			
		||||
    for (int j = 0; j < 4; ++j) {
 | 
			
		||||
      pixel[j] = sqrt(pixel[j]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  for (int i = 0; i < 6; ++i) {
 | 
			
		||||
    for (int j = 0; j < 4; ++j) {
 | 
			
		||||
      pixel[j] *= pixel[j];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern "C" void cuda_texture_2d(void *surface, int width, int height,
 | 
			
		||||
                                size_t pitch, float t) {
 | 
			
		||||
  cudaError_t error = cudaSuccess;
 | 
			
		||||
 | 
			
		||||
  dim3 Db = dim3(16, 16);  // block dimensions are fixed to be 256 threads
 | 
			
		||||
  dim3 Dg = dim3((width + Db.x - 1) / Db.x, (height + Db.y - 1) / Db.y);
 | 
			
		||||
 | 
			
		||||
  cuda_kernel_texture_2d<<<Dg, Db>>>((unsigned char *)surface, width, height,
 | 
			
		||||
                                     pitch, t);
 | 
			
		||||
 | 
			
		||||
  error = cudaGetLastError();
 | 
			
		||||
 | 
			
		||||
  if (error != cudaSuccess) {
 | 
			
		||||
    printf("cuda_kernel_texture_2d() failed to launch error = %d\n", error);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@ -1,46 +0,0 @@
 | 
			
		||||
################################################################################
 | 
			
		||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
#
 | 
			
		||||
# Redistribution and use in source and binary forms, with or without
 | 
			
		||||
# modification, are permitted provided that the following conditions
 | 
			
		||||
# are met:
 | 
			
		||||
#  * Redistributions of source code must retain the above copyright
 | 
			
		||||
#    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
#  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
#    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
#    documentation and/or other materials provided with the distribution.
 | 
			
		||||
#  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
#    contributors may be used to endorse or promote products derived
 | 
			
		||||
#    from this software without specific prior written permission.
 | 
			
		||||
#
 | 
			
		||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
#
 | 
			
		||||
################################################################################
 | 
			
		||||
#
 | 
			
		||||
# Makefile project only supported on Mac OS X and Linux Platforms)
 | 
			
		||||
#
 | 
			
		||||
################################################################################
 | 
			
		||||
 | 
			
		||||
# Target rules
 | 
			
		||||
all: build
 | 
			
		||||
 | 
			
		||||
build:
 | 
			
		||||
	$(info >>> WARNING - VFlockingD3D10 is not supported on Linux - waiving sample <<<)
 | 
			
		||||
 | 
			
		||||
run: build
 | 
			
		||||
 | 
			
		||||
testrun: build
 | 
			
		||||
 | 
			
		||||
clean:
 | 
			
		||||
 | 
			
		||||
clobber: clean
 | 
			
		||||
@ -1,49 +0,0 @@
 | 
			
		||||
# VFlockingD3D10 - VFlockingD3D10
 | 
			
		||||
 | 
			
		||||
## Description
 | 
			
		||||
 | 
			
		||||
The sample models formation of V-shaped flocks by big birds, such as geese and cranes. The algorithms of such flocking are borrowed from the paper "V-like formations in flocks of artificial birds" from Artificial Life, Vol. 14, No. 2, 2008. The sample has CPU- and GPU-based implementations. Press 'g' to toggle between them. The GPU-based simulation works many times faster than the CPU-based one. The printout in the console window reports the simulation time per step. Press 'r' to reset the initial distribution of birds.
 | 
			
		||||
 | 
			
		||||
## Key Concepts
 | 
			
		||||
 | 
			
		||||
Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation, Performance Strategies
 | 
			
		||||
 | 
			
		||||
## Supported SM Architectures
 | 
			
		||||
 | 
			
		||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.9 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 | 
			
		||||
 | 
			
		||||
## Supported OSes
 | 
			
		||||
 | 
			
		||||
Windows
 | 
			
		||||
 | 
			
		||||
## Supported CPU Architecture
 | 
			
		||||
 | 
			
		||||
x86_64
 | 
			
		||||
 | 
			
		||||
## CUDA APIs involved
 | 
			
		||||
 | 
			
		||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 | 
			
		||||
cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGetErrorString, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaMemset, cudaGraphicsUnregisterResource, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate
 | 
			
		||||
 | 
			
		||||
## Dependencies needed to build/run
 | 
			
		||||
[DirectX](../../../README.md#directx)
 | 
			
		||||
 | 
			
		||||
## Prerequisites
 | 
			
		||||
 | 
			
		||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 | 
			
		||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 | 
			
		||||
 | 
			
		||||
## Build and Run
 | 
			
		||||
 | 
			
		||||
### Windows
 | 
			
		||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
 | 
			
		||||
```
 | 
			
		||||
*_vs<version>.sln - for Visual Studio <version>
 | 
			
		||||
```
 | 
			
		||||
Each individual sample has its own set of solution files in its directory:
 | 
			
		||||
 | 
			
		||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
 | 
			
		||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
 | 
			
		||||
 | 
			
		||||
## References (for more details)
 | 
			
		||||
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -1,45 +0,0 @@
 | 
			
		||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifndef VFLOCKING_D3D10_H
 | 
			
		||||
#define VFLOCKING_D3D10_H
 | 
			
		||||
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
// simulation parameters
 | 
			
		||||
struct Params {
 | 
			
		||||
  float alpha;
 | 
			
		||||
  float upwashX;
 | 
			
		||||
  float upwashY;
 | 
			
		||||
  float wingspan;
 | 
			
		||||
  float dX;
 | 
			
		||||
  float dY;
 | 
			
		||||
  float epsilon;
 | 
			
		||||
  float lambda;  // -0.1073f * wingspan ;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@ -1,288 +0,0 @@
 | 
			
		||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
#include <helper_cuda.h>
 | 
			
		||||
#include <helper_math.h>
 | 
			
		||||
#include <VFlockingD3D10.h>
 | 
			
		||||
 | 
			
		||||
#define PI 3.1415926536f
 | 
			
		||||
 | 
			
		||||
typedef unsigned int uint;
 | 
			
		||||
 | 
			
		||||
__device__ bool isInsideQuad_D(float2 pos0, float2 pos1, float width,
 | 
			
		||||
                               float height) {
 | 
			
		||||
  if (fabs(pos0.x - pos1.x) < 0.5f * width &&
 | 
			
		||||
      fabs(pos0.y - pos1.y) < 0.5f * height) {
 | 
			
		||||
    return true;
 | 
			
		||||
  } else {
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
__device__ bool isInsideBird(float2 pixel, float2 pos, float width,
 | 
			
		||||
                             float height, float radius) {
 | 
			
		||||
  if (abs(pixel.x - pos.x) < 0.5f * width &&
 | 
			
		||||
          abs(pixel.y - pos.y) < 0.5f * height ||
 | 
			
		||||
      (pixel.x - pos.x) * (pixel.x - pos.x) +
 | 
			
		||||
              (pixel.y - pos.y) * (pixel.y - pos.y) <
 | 
			
		||||
          radius * radius) {
 | 
			
		||||
    return true;
 | 
			
		||||
  } else {
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
__global__ void cuda_kernel_update(float2 *newPos, float2 *curPos,
 | 
			
		||||
                                   uint numBirds, bool *hasproxy,
 | 
			
		||||
                                   bool *neighbors, bool *rightgoals,
 | 
			
		||||
                                   bool *leftgoals, Params *params) {
 | 
			
		||||
  uint i = blockIdx.x * blockDim.x + threadIdx.x;
 | 
			
		||||
 | 
			
		||||
  if (i >= numBirds) {
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  float minDist = 50000.f;
 | 
			
		||||
  float2 dij = make_float2(0.f);
 | 
			
		||||
 | 
			
		||||
  if (!hasproxy[i]) {
 | 
			
		||||
    for (uint j = 0; j < numBirds; j++) {
 | 
			
		||||
      if (j == i) {
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      if (leftgoals[i * numBirds + j]) {
 | 
			
		||||
        dij = params->dX * normalize(curPos[j] - curPos[i]);
 | 
			
		||||
        break;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  } else {
 | 
			
		||||
    bool collision = false;
 | 
			
		||||
 | 
			
		||||
    for (uint j = 0; j < numBirds; j++) {
 | 
			
		||||
      float d;
 | 
			
		||||
 | 
			
		||||
      if (leftgoals[i * numBirds + j]) {
 | 
			
		||||
        d = curPos[j].x - (params->wingspan + params->lambda) - curPos[i].x;
 | 
			
		||||
 | 
			
		||||
        if (fabs(d) < fabs(minDist)) {
 | 
			
		||||
          minDist = d;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      if (rightgoals[i * numBirds + j]) {
 | 
			
		||||
        d = curPos[j].x + (params->wingspan + params->lambda) - curPos[i].x;
 | 
			
		||||
 | 
			
		||||
        if (fabs(d) < fabs(minDist)) {
 | 
			
		||||
          minDist = d;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      if (neighbors[i * numBirds + j] && !collision) {
 | 
			
		||||
        if (curPos[j].y >= curPos[i].y &&
 | 
			
		||||
            curPos[j].y < curPos[i].y + params->epsilon) {
 | 
			
		||||
          dij.y = -params->dY;
 | 
			
		||||
          collision = true;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (fabs(minDist) <= params->dX) {
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    dij.x = minDist > 0 ? params->dX : -params->dX;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  newPos[i].x = curPos[i].x + dij.x;
 | 
			
		||||
  newPos[i].y = curPos[i].y + dij.y;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
__global__ void cuda_kernel_checktriples(float2 *pos, uint numBirds,
 | 
			
		||||
                                         bool *hasproxy, bool *neighbors,
 | 
			
		||||
                                         bool *rightgoals, bool *leftgoals,
 | 
			
		||||
                                         uint3 *triples, Params *params) {
 | 
			
		||||
  uint ith = blockIdx.x * blockDim.x + threadIdx.x;
 | 
			
		||||
 | 
			
		||||
  if (ith >= numBirds * (numBirds - 1) * (numBirds - 2) / 6) {
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  uint a[3];
 | 
			
		||||
  a[0] = triples[ith].x;
 | 
			
		||||
  a[1] = triples[ith].y;
 | 
			
		||||
  a[2] = triples[ith].z;
 | 
			
		||||
 | 
			
		||||
  uint i, j, x;
 | 
			
		||||
 | 
			
		||||
  for (i = 0; i < 3; i++) {
 | 
			
		||||
    for (j = 2; j > i; j--) {
 | 
			
		||||
      if (pos[a[j - 1]].y > pos[a[j]].y) {
 | 
			
		||||
        x = a[j - 1];
 | 
			
		||||
        a[j - 1] = a[j];
 | 
			
		||||
        a[j] = x;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (hasproxy[a[0]]) {
 | 
			
		||||
    float a2a1 = pos[a[2]].x - pos[a[1]].x;
 | 
			
		||||
 | 
			
		||||
    if (fabs(a2a1) < 2.f * (params->wingspan + params->lambda))
 | 
			
		||||
      if (a2a1 >= 0) {
 | 
			
		||||
        if (leftgoals[a[0] * numBirds + a[2]]) {
 | 
			
		||||
          leftgoals[a[0] * numBirds + a[2]] = false;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (rightgoals[a[0] * numBirds + a[1]]) {
 | 
			
		||||
          rightgoals[a[0] * numBirds + a[1]] = false;
 | 
			
		||||
        }
 | 
			
		||||
      } else {
 | 
			
		||||
        if (leftgoals[a[0] * numBirds + a[1]]) {
 | 
			
		||||
          leftgoals[a[0] * numBirds + a[1]] = false;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (rightgoals[a[0] * numBirds + a[2]]) {
 | 
			
		||||
          rightgoals[a[0] * numBirds + a[2]] = false;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
  } else {
 | 
			
		||||
    if ((leftgoals[a[0] * numBirds + a[2]]) &&
 | 
			
		||||
        (leftgoals[a[0] * numBirds + a[1]]))
 | 
			
		||||
      if ((length(pos[a[1]] - pos[a[0]]) < length(pos[a[2]] - pos[a[0]]))) {
 | 
			
		||||
        leftgoals[a[0] * numBirds + a[2]] = false;
 | 
			
		||||
      } else {
 | 
			
		||||
        leftgoals[a[0] * numBirds + a[1]] = false;
 | 
			
		||||
      }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
__global__ void cuda_kernel_checkpairs(float2 *pos, uint numBirds,
 | 
			
		||||
                                       bool *hasproxy, bool *neighbors,
 | 
			
		||||
                                       bool *rightgoals, bool *leftgoals,
 | 
			
		||||
                                       uint2 *pairs, Params *params) {
 | 
			
		||||
  uint i = blockIdx.x * blockDim.x + threadIdx.x;
 | 
			
		||||
 | 
			
		||||
  if (i >= numBirds * (numBirds - 1) / 2) {
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  uint front, back;
 | 
			
		||||
 | 
			
		||||
  if (pos[pairs[i].y].y > pos[pairs[i].x].y) {
 | 
			
		||||
    front = pairs[i].y;
 | 
			
		||||
    back = pairs[i].x;
 | 
			
		||||
  } else {
 | 
			
		||||
    front = pairs[i].x;
 | 
			
		||||
    back = pairs[i].y;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  leftgoals[back * numBirds + front] = true;
 | 
			
		||||
  rightgoals[back * numBirds + front] = true;
 | 
			
		||||
 | 
			
		||||
  float2 stepback;
 | 
			
		||||
  stepback.x = pos[front].x;
 | 
			
		||||
  stepback.y = pos[front].y - 0.5f * params->upwashY;
 | 
			
		||||
 | 
			
		||||
  if (isInsideQuad_D(
 | 
			
		||||
          pos[back], stepback,
 | 
			
		||||
          2.f * (params->wingspan + params->lambda + params->upwashX),
 | 
			
		||||
          params->upwashY)) {
 | 
			
		||||
    neighbors[back * numBirds + front] = true;
 | 
			
		||||
 | 
			
		||||
    if (!hasproxy[back]) {
 | 
			
		||||
      hasproxy[back] = true;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern "C" void cuda_simulate(float2 *newPos, float2 *curPos, uint numBirds,
 | 
			
		||||
                              bool *d_hasproxy, bool *d_neighbors,
 | 
			
		||||
                              bool *d_leftgoals, bool *d_rightgoals,
 | 
			
		||||
                              uint2 *d_pairs, uint3 *d_triples,
 | 
			
		||||
                              Params *d_params) {
 | 
			
		||||
  cudaError_t error = cudaSuccess;
 | 
			
		||||
  float tempms;
 | 
			
		||||
  static float ms = 0.f;
 | 
			
		||||
  static uint step = 0;
 | 
			
		||||
  int smallblockSize = 32, midblockSize = 128, bigblockSize = 32;
 | 
			
		||||
 | 
			
		||||
  cudaEvent_t e_start, e_stop;
 | 
			
		||||
  cudaEventCreate(&e_start);
 | 
			
		||||
  cudaEventCreate(&e_stop);
 | 
			
		||||
  cudaEventRecord(e_start, 0);
 | 
			
		||||
 | 
			
		||||
  cudaMemset(d_leftgoals, 0, numBirds * numBirds * sizeof(bool));
 | 
			
		||||
  cudaMemset(d_rightgoals, 0, numBirds * numBirds * sizeof(bool));
 | 
			
		||||
  cudaMemset(d_hasproxy, 0, numBirds * sizeof(bool));
 | 
			
		||||
  cudaMemset(d_neighbors, 0, numBirds * numBirds * sizeof(bool));
 | 
			
		||||
 | 
			
		||||
  dim3 Db = dim3(bigblockSize);
 | 
			
		||||
  dim3 Dg =
 | 
			
		||||
      dim3((numBirds * (numBirds - 1) / 2 + bigblockSize - 1) / bigblockSize);
 | 
			
		||||
  cuda_kernel_checkpairs<<<Dg, Db>>>(curPos, numBirds, d_hasproxy, d_neighbors,
 | 
			
		||||
                                     d_rightgoals, d_leftgoals, d_pairs,
 | 
			
		||||
                                     d_params);
 | 
			
		||||
 | 
			
		||||
  Db = dim3(midblockSize);
 | 
			
		||||
  Dg =
 | 
			
		||||
      dim3((numBirds * (numBirds - 1) * (numBirds - 2) / 6 + bigblockSize - 1) /
 | 
			
		||||
           bigblockSize);
 | 
			
		||||
  cuda_kernel_checktriples<<<Dg, Db>>>(curPos, numBirds, d_hasproxy,
 | 
			
		||||
                                       d_neighbors, d_rightgoals, d_leftgoals,
 | 
			
		||||
                                       d_triples, d_params);
 | 
			
		||||
 | 
			
		||||
  Db = dim3(smallblockSize);
 | 
			
		||||
  Dg = dim3((numBirds + smallblockSize - 1) / smallblockSize);
 | 
			
		||||
  cuda_kernel_update<<<Dg, Db>>>(newPos, curPos, numBirds, d_hasproxy,
 | 
			
		||||
                                 d_neighbors, d_rightgoals, d_leftgoals,
 | 
			
		||||
                                 d_params /*, d_pWingTips */);
 | 
			
		||||
 | 
			
		||||
  cudaDeviceSynchronize();
 | 
			
		||||
 | 
			
		||||
  cudaEventRecord(e_stop, 0);
 | 
			
		||||
  cudaEventSynchronize(e_stop);
 | 
			
		||||
  cudaEventElapsedTime(&tempms, e_start, e_stop);
 | 
			
		||||
  ms += tempms;
 | 
			
		||||
 | 
			
		||||
  if (!(step % 100) && step) {
 | 
			
		||||
    printf("GPU, step %d \ntime per step %6.3f ms \n", step, ms / 100.f);
 | 
			
		||||
    ms = 0.f;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  step++;
 | 
			
		||||
 | 
			
		||||
  error = cudaGetLastError();
 | 
			
		||||
 | 
			
		||||
  if (error != cudaSuccess) {
 | 
			
		||||
    printf("one of the cuda kernels failed to launch, error = %d\n", error);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							@ -1,46 +0,0 @@
 | 
			
		||||
################################################################################
 | 
			
		||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
#
 | 
			
		||||
# Redistribution and use in source and binary forms, with or without
 | 
			
		||||
# modification, are permitted provided that the following conditions
 | 
			
		||||
# are met:
 | 
			
		||||
#  * Redistributions of source code must retain the above copyright
 | 
			
		||||
#    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
#  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
#    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
#    documentation and/or other materials provided with the distribution.
 | 
			
		||||
#  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
#    contributors may be used to endorse or promote products derived
 | 
			
		||||
#    from this software without specific prior written permission.
 | 
			
		||||
#
 | 
			
		||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
#
 | 
			
		||||
################################################################################
 | 
			
		||||
#
 | 
			
		||||
# Makefile project only supported on Mac OS X and Linux Platforms)
 | 
			
		||||
#
 | 
			
		||||
################################################################################
 | 
			
		||||
 | 
			
		||||
# Target rules
 | 
			
		||||
all: build
 | 
			
		||||
 | 
			
		||||
build:
 | 
			
		||||
	$(info >>> WARNING - simpleD3D10 is not supported on Linux - waiving sample <<<)
 | 
			
		||||
 | 
			
		||||
run: build
 | 
			
		||||
 | 
			
		||||
testrun: build
 | 
			
		||||
 | 
			
		||||
clean:
 | 
			
		||||
 | 
			
		||||
clobber: clean
 | 
			
		||||
@ -1,49 +0,0 @@
 | 
			
		||||
# simpleD3D10 - Simple Direct3D10 (Vertex Array)
 | 
			
		||||
 | 
			
		||||
## Description
 | 
			
		||||
 | 
			
		||||
Simple program which demonstrates interoperability between CUDA and Direct3D10. The program generates a vertex array with CUDA and uses Direct3D10 to render the geometry.  A Direct3D Capable device is required.
 | 
			
		||||
 | 
			
		||||
## Key Concepts
 | 
			
		||||
 | 
			
		||||
Graphics Interop, 3D Graphics
 | 
			
		||||
 | 
			
		||||
## Supported SM Architectures
 | 
			
		||||
 | 
			
		||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.9 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 | 
			
		||||
 | 
			
		||||
## Supported OSes
 | 
			
		||||
 | 
			
		||||
Windows
 | 
			
		||||
 | 
			
		||||
## Supported CPU Architecture
 | 
			
		||||
 | 
			
		||||
x86_64
 | 
			
		||||
 | 
			
		||||
## CUDA APIs involved
 | 
			
		||||
 | 
			
		||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 | 
			
		||||
cudaGraphicsUnmapResources, cudaGetErrorString, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaGraphicsUnregisterResource, cudaGetDeviceProperties
 | 
			
		||||
 | 
			
		||||
## Dependencies needed to build/run
 | 
			
		||||
[DirectX](../../../README.md#directx)
 | 
			
		||||
 | 
			
		||||
## Prerequisites
 | 
			
		||||
 | 
			
		||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 | 
			
		||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 | 
			
		||||
 | 
			
		||||
## Build and Run
 | 
			
		||||
 | 
			
		||||
### Windows
 | 
			
		||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
 | 
			
		||||
```
 | 
			
		||||
*_vs<version>.sln - for Visual Studio <version>
 | 
			
		||||
```
 | 
			
		||||
Each individual sample has its own set of solution files in its directory:
 | 
			
		||||
 | 
			
		||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
 | 
			
		||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
 | 
			
		||||
 | 
			
		||||
## References (for more details)
 | 
			
		||||
 | 
			
		||||
										
											Binary file not shown.
										
									
								
							@ -1,650 +0,0 @@
 | 
			
		||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
/* This example demonstrates how to use the CUDA Direct3D bindings to fill
 | 
			
		||||
 * a vertex buffer with CUDA and use Direct3D to render the data.
 | 
			
		||||
 * Host code.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#pragma warning(disable : 4312)
 | 
			
		||||
 | 
			
		||||
// Windows System include files
 | 
			
		||||
#include <windows.h>
 | 
			
		||||
#include <mmsystem.h>
 | 
			
		||||
 | 
			
		||||
// This header includes all the necessary D3D10 and CUDA includes
 | 
			
		||||
#include <dynlink_d3d10.h>
 | 
			
		||||
#include <cuda_runtime_api.h>
 | 
			
		||||
#include <cuda_d3d10_interop.h>
 | 
			
		||||
 | 
			
		||||
// includes, project
 | 
			
		||||
#include <rendercheck_d3d10.h>
 | 
			
		||||
#include <helper_functions.h>  // Helper functions for other non-cuda utilities
 | 
			
		||||
#include <helper_cuda.h>       // CUDA Helper Functions for initialization
 | 
			
		||||
 | 
			
		||||
#define MAX_EPSILON 10
 | 
			
		||||
 | 
			
		||||
static char *sSDKSample = "simpleD3D10";
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Global variables
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
IDXGIAdapter *g_pCudaCapableAdapter = NULL;  // Adapter to use
 | 
			
		||||
ID3D10Device *g_pd3dDevice = NULL;           // Our rendering device
 | 
			
		||||
IDXGISwapChain *g_pSwapChain = NULL;         // The swap chain of the window
 | 
			
		||||
ID3D10RenderTargetView *g_pSwapChainRTV =
 | 
			
		||||
    NULL;  // The Render target view on the swap chain ( used for clear)
 | 
			
		||||
 | 
			
		||||
ID3D10InputLayout *g_pInputLayout = NULL;
 | 
			
		||||
ID3D10Effect *g_pSimpleEffect = NULL;
 | 
			
		||||
ID3D10EffectTechnique *g_pSimpleTechnique = NULL;
 | 
			
		||||
ID3D10EffectMatrixVariable *g_pmWorld = NULL;
 | 
			
		||||
ID3D10EffectMatrixVariable *g_pmView = NULL;
 | 
			
		||||
ID3D10EffectMatrixVariable *g_pmProjection = NULL;
 | 
			
		||||
 | 
			
		||||
static const char g_simpleEffectSrc[] =
 | 
			
		||||
    "matrix g_mWorld;\n"
 | 
			
		||||
    "matrix g_mView;\n"
 | 
			
		||||
    "matrix g_mProjection;\n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "struct Fragment{ \n"
 | 
			
		||||
    "    float4 Pos : SV_POSITION;\n"
 | 
			
		||||
    "    float4 Col : TEXCOORD0; };\n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "Fragment VS( float4 Pos : POSITION, float4 Col : COLOR )\n"
 | 
			
		||||
    "{\n"
 | 
			
		||||
    "    Fragment f;\n"
 | 
			
		||||
    "    f.Pos = mul(Pos, g_mWorld);\n"
 | 
			
		||||
    "    f.Pos = mul(f.Pos, g_mView);\n"
 | 
			
		||||
    "    f.Pos = mul(f.Pos, g_mProjection);\n"
 | 
			
		||||
    "    f.Col = Col;\n"
 | 
			
		||||
    "    return f;\n"
 | 
			
		||||
    "}\n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "float4 PS( Fragment f ) : SV_Target\n"
 | 
			
		||||
    "{\n"
 | 
			
		||||
    "    return f.Col;\n"
 | 
			
		||||
    "}\n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "technique10 Render\n"
 | 
			
		||||
    "{\n"
 | 
			
		||||
    "    pass P0\n"
 | 
			
		||||
    "    {\n"
 | 
			
		||||
    "        SetVertexShader( CompileShader( vs_4_0, VS() ) );\n"
 | 
			
		||||
    "        SetGeometryShader( NULL );\n"
 | 
			
		||||
    "        SetPixelShader( CompileShader( ps_4_0, PS() ) );\n"
 | 
			
		||||
    "    }\n"
 | 
			
		||||
    "}\n"
 | 
			
		||||
    "\n";
 | 
			
		||||
 | 
			
		||||
ID3D10Buffer *g_pVB = NULL;  // Buffer to hold vertices
 | 
			
		||||
 | 
			
		||||
struct cudaGraphicsResource *cuda_VB_resource;  // handles D3D10-CUDA exchange
 | 
			
		||||
 | 
			
		||||
// testing/tracing function used pervasively in tests.  if the condition is
 | 
			
		||||
// unsatisfied then spew and fail the function immediately (doing no cleanup)
 | 
			
		||||
#define AssertOrQuit(x)                                                  \
 | 
			
		||||
  if (!(x)) {                                                            \
 | 
			
		||||
    fprintf(stdout, "Assert unsatisfied in %s at %s:%d\n", __FUNCTION__, \
 | 
			
		||||
            __FILE__, __LINE__);                                         \
 | 
			
		||||
    return 1;                                                            \
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
// A structure for our custom vertex type
 | 
			
		||||
struct CUSTOMVERTEX {
 | 
			
		||||
  FLOAT x, y, z;  // The untransformed, 3D position for the vertex
 | 
			
		||||
  DWORD color;    // The vertex color
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
const unsigned int g_WindowWidth = 1024;
 | 
			
		||||
const unsigned int g_WindowHeight = 1024;
 | 
			
		||||
 | 
			
		||||
const unsigned int g_MeshWidth = 512;
 | 
			
		||||
const unsigned int g_MeshHeight = 512;
 | 
			
		||||
 | 
			
		||||
const unsigned int g_NumVertices = g_MeshWidth * g_MeshHeight;
 | 
			
		||||
 | 
			
		||||
bool g_bPassed = true;
 | 
			
		||||
int g_iFrameToCompare = 10;
 | 
			
		||||
 | 
			
		||||
int *pArgc = NULL;
 | 
			
		||||
char **pArgv = NULL;
 | 
			
		||||
 | 
			
		||||
float anim;
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Forward declarations
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
void runTest(int argc, char **argv, char *ref_file);
 | 
			
		||||
void runCuda();
 | 
			
		||||
bool SaveResult(int argc, char **argv);
 | 
			
		||||
HRESULT InitD3D(HWND hWnd);
 | 
			
		||||
HRESULT InitGeometry();
 | 
			
		||||
VOID Cleanup();
 | 
			
		||||
VOID SetupMatrices();
 | 
			
		||||
VOID Render();
 | 
			
		||||
LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
 | 
			
		||||
 | 
			
		||||
// CUDA/D3D10 kernel
 | 
			
		||||
extern "C" void simpleD3DKernel(float4 *pos, unsigned int width,
 | 
			
		||||
                                unsigned int height, float time);
 | 
			
		||||
 | 
			
		||||
#define NAME_LEN 512
 | 
			
		||||
 | 
			
		||||
bool findCUDADevice() {
 | 
			
		||||
  int nGraphicsGPU = 0;
 | 
			
		||||
  int deviceCount = 0;
 | 
			
		||||
  bool bFoundGraphics = false;
 | 
			
		||||
  char devname[NAME_LEN];
 | 
			
		||||
 | 
			
		||||
  // This function call returns 0 if there are no CUDA capable devices.
 | 
			
		||||
  cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
 | 
			
		||||
 | 
			
		||||
  if (error_id != cudaSuccess) {
 | 
			
		||||
    printf("cudaGetDeviceCount returned %d\n-> %s\n", (int)error_id,
 | 
			
		||||
           cudaGetErrorString(error_id));
 | 
			
		||||
    exit(EXIT_FAILURE);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (deviceCount == 0) {
 | 
			
		||||
    printf("> There are no device(s) supporting CUDA\n");
 | 
			
		||||
    return false;
 | 
			
		||||
  } else {
 | 
			
		||||
    printf("> Found %d CUDA Capable Device(s)\n", deviceCount);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Get CUDA device properties
 | 
			
		||||
  cudaDeviceProp deviceProp;
 | 
			
		||||
 | 
			
		||||
  for (int dev = 0; dev < deviceCount; ++dev) {
 | 
			
		||||
    cudaGetDeviceProperties(&deviceProp, dev);
 | 
			
		||||
    STRCPY(devname, NAME_LEN, deviceProp.name);
 | 
			
		||||
    printf("> GPU %d: %s\n", dev, devname);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool findDXDevice(char *dev_name) {
 | 
			
		||||
  HRESULT hr = S_OK;
 | 
			
		||||
  cudaError cuStatus;
 | 
			
		||||
 | 
			
		||||
  // Iterate through the candidate adapters
 | 
			
		||||
  IDXGIFactory *pFactory;
 | 
			
		||||
  hr = sFnPtr_CreateDXGIFactory(__uuidof(IDXGIFactory), (void **)(&pFactory));
 | 
			
		||||
 | 
			
		||||
  if (!SUCCEEDED(hr)) {
 | 
			
		||||
    printf("> No DXGI Factory created.\n");
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  UINT adapter = 0;
 | 
			
		||||
 | 
			
		||||
  for (; !g_pCudaCapableAdapter; ++adapter) {
 | 
			
		||||
    // Get a candidate DXGI adapter
 | 
			
		||||
    IDXGIAdapter *pAdapter = NULL;
 | 
			
		||||
    hr = pFactory->EnumAdapters(adapter, &pAdapter);
 | 
			
		||||
 | 
			
		||||
    if (FAILED(hr)) {
 | 
			
		||||
      break;  // no compatible adapters found
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Query to see if there exists a corresponding compute device
 | 
			
		||||
    int cuDevice;
 | 
			
		||||
    cuStatus = cudaD3D10GetDevice(&cuDevice, pAdapter);
 | 
			
		||||
    // This prints and resets  the cudaError to cudaSuccess
 | 
			
		||||
    printLastCudaError("cudaD3D10GetDevice failed");
 | 
			
		||||
 | 
			
		||||
    if (cudaSuccess == cuStatus) {
 | 
			
		||||
      // If so, mark it as the one against which to create our d3d10 device
 | 
			
		||||
      g_pCudaCapableAdapter = pAdapter;
 | 
			
		||||
      g_pCudaCapableAdapter->AddRef();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pAdapter->Release();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  printf("> Found %d D3D10 Adapater(s).\n", (int)adapter);
 | 
			
		||||
 | 
			
		||||
  pFactory->Release();
 | 
			
		||||
 | 
			
		||||
  if (!g_pCudaCapableAdapter) {
 | 
			
		||||
    printf("> Found 0 D3D10 Adapater(s) /w Compute capability.\n");
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  DXGI_ADAPTER_DESC adapterDesc;
 | 
			
		||||
  g_pCudaCapableAdapter->GetDesc(&adapterDesc);
 | 
			
		||||
  wcstombs(dev_name, adapterDesc.Description, 128);
 | 
			
		||||
 | 
			
		||||
  printf("> Found 1 D3D10 Adapater(s) /w Compute capability.\n");
 | 
			
		||||
  printf("> %s\n", dev_name);
 | 
			
		||||
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Program main
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
int main(int argc, char **argv) {
 | 
			
		||||
  char device_name[256];
 | 
			
		||||
  char *ref_file = NULL;
 | 
			
		||||
 | 
			
		||||
  pArgc = &argc;
 | 
			
		||||
  pArgv = argv;
 | 
			
		||||
 | 
			
		||||
  printf("> %s starting...\n", sSDKSample);
 | 
			
		||||
 | 
			
		||||
  if (!findCUDADevice()) {  // Search for CUDA GPU
 | 
			
		||||
    printf("> CUDA Device NOT found on \"%s\".. Exiting.\n", device_name);
 | 
			
		||||
    exit(EXIT_SUCCESS);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Search for D3D API (locate drivers, does not mean device is found)
 | 
			
		||||
  if (!dynlinkLoadD3D10API()) {
 | 
			
		||||
    printf("> D3D10 API libraries NOT found on.. Exiting.\n");
 | 
			
		||||
    dynlinkUnloadD3D10API();
 | 
			
		||||
    exit(EXIT_SUCCESS);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (!findDXDevice(device_name)) {  // Search for D3D Hardware Device
 | 
			
		||||
    printf("> D3D10 Graphics Device NOT found.. Exiting.\n");
 | 
			
		||||
    dynlinkUnloadD3D10API();
 | 
			
		||||
    exit(EXIT_SUCCESS);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (argc > 1) {
 | 
			
		||||
    if (checkCmdLineFlag(argc, (const char **)argv, "file")) {
 | 
			
		||||
      getCmdLineArgumentString(argc, (const char **)argv, "file",
 | 
			
		||||
                               (char **)&ref_file);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  runTest(argc, argv, ref_file);
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // and exit
 | 
			
		||||
  //
 | 
			
		||||
  printf("%s running on %s exiting...\n", sSDKSample, device_name);
 | 
			
		||||
  printf("%s sample finished returned: %s\n", sSDKSample,
 | 
			
		||||
         (g_bPassed ? "OK" : "ERROR!"));
 | 
			
		||||
  exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! Run a simple test for CUDA
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void runTest(int argc, char **argv, char *ref_file) {
 | 
			
		||||
  // Register the window class
 | 
			
		||||
  WNDCLASSEX wc = {sizeof(WNDCLASSEX),       CS_CLASSDC, MsgProc, 0L,   0L,
 | 
			
		||||
                   GetModuleHandle(NULL),    NULL,       NULL,    NULL, NULL,
 | 
			
		||||
                   "CUDA/D3D10 simpleD3D10", NULL};
 | 
			
		||||
  RegisterClassEx(&wc);
 | 
			
		||||
 | 
			
		||||
  // Create the application's window
 | 
			
		||||
  int xBorder = ::GetSystemMetrics(SM_CXSIZEFRAME);
 | 
			
		||||
  int yMenu = ::GetSystemMetrics(SM_CYMENU);
 | 
			
		||||
  int yBorder = ::GetSystemMetrics(SM_CYSIZEFRAME);
 | 
			
		||||
  HWND hWnd = CreateWindow(
 | 
			
		||||
      wc.lpszClassName, "CUDA/D3D10 simpleD3D10", WS_OVERLAPPEDWINDOW, 0, 0,
 | 
			
		||||
      g_WindowWidth + 2 * xBorder, g_WindowHeight + 2 * yBorder + yMenu, NULL,
 | 
			
		||||
      NULL, wc.hInstance, NULL);
 | 
			
		||||
 | 
			
		||||
  // Initialize Direct3D
 | 
			
		||||
  if (SUCCEEDED(InitD3D(hWnd))) {
 | 
			
		||||
    // Create the scene geometry
 | 
			
		||||
    if (SUCCEEDED(InitGeometry())) {
 | 
			
		||||
      // Initialize interoperability between CUDA and Direct3D
 | 
			
		||||
      // Register vertex buffer with CUDA
 | 
			
		||||
      // DEPRECATED: cudaD3D10RegisterResource(g_pVB,
 | 
			
		||||
      // cudaD3D10RegisterFlagsNone);
 | 
			
		||||
      cudaGraphicsD3D10RegisterResource(&cuda_VB_resource, g_pVB,
 | 
			
		||||
                                        cudaD3D10RegisterFlagsNone);
 | 
			
		||||
      getLastCudaError("cudaGraphicsD3D10RegisterResource (g_pVB) failed");
 | 
			
		||||
 | 
			
		||||
      // Initialize vertex buffer with CUDA
 | 
			
		||||
      runCuda();
 | 
			
		||||
 | 
			
		||||
      // Save result
 | 
			
		||||
      SaveResult(argc, argv);
 | 
			
		||||
 | 
			
		||||
      // Show the window
 | 
			
		||||
      ShowWindow(hWnd, SW_SHOWDEFAULT);
 | 
			
		||||
      UpdateWindow(hWnd);
 | 
			
		||||
 | 
			
		||||
      // Enter the message loop
 | 
			
		||||
      MSG msg;
 | 
			
		||||
      ZeroMemory(&msg, sizeof(msg));
 | 
			
		||||
 | 
			
		||||
      while (msg.message != WM_QUIT) {
 | 
			
		||||
        if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE)) {
 | 
			
		||||
          TranslateMessage(&msg);
 | 
			
		||||
          DispatchMessage(&msg);
 | 
			
		||||
        } else {
 | 
			
		||||
          Render();
 | 
			
		||||
 | 
			
		||||
          if (ref_file != NULL) {
 | 
			
		||||
            for (int count = 0; count < g_iFrameToCompare; count++) {
 | 
			
		||||
              Render();
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            const char *cur_image_path = "simpleD3D10.ppm";
 | 
			
		||||
 | 
			
		||||
            // Save a reference of our current test run image
 | 
			
		||||
            CheckRenderD3D10::ActiveRenderTargetToPPM(g_pd3dDevice,
 | 
			
		||||
                                                      cur_image_path);
 | 
			
		||||
 | 
			
		||||
            // compare to offical reference image, printing PASS or FAIL.
 | 
			
		||||
            g_bPassed = CheckRenderD3D10::PPMvsPPM(cur_image_path, ref_file,
 | 
			
		||||
                                                   argv[0], MAX_EPSILON, 0.15f);
 | 
			
		||||
 | 
			
		||||
            Cleanup();
 | 
			
		||||
 | 
			
		||||
            PostQuitMessage(0);
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Release D3D Library (after message loop)
 | 
			
		||||
  dynlinkUnloadD3D10API();
 | 
			
		||||
 | 
			
		||||
  UnregisterClass(wc.lpszClassName, wc.hInstance);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! Run the Cuda part of the computation
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void runCuda() {
 | 
			
		||||
  // Map vertex buffer to Cuda
 | 
			
		||||
  float4 *d_ptr;
 | 
			
		||||
 | 
			
		||||
  // CUDA Map call to the Vertex Buffer and return a pointer
 | 
			
		||||
  // DEPRECATED: cudaD3D10MapResources(1, (ID3D10Resource **)&g_pVB);
 | 
			
		||||
  checkCudaErrors(cudaGraphicsMapResources(1, &cuda_VB_resource, 0));
 | 
			
		||||
  getLastCudaError("cudaGraphicsMapResources failed");
 | 
			
		||||
 | 
			
		||||
  // DEPRECATED: cudaD3D10ResourceGetMappedPointer( (void **)&dptr, g_pVB, 0);
 | 
			
		||||
  size_t num_bytes;
 | 
			
		||||
  checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
 | 
			
		||||
      (void **)&d_ptr, &num_bytes, cuda_VB_resource));
 | 
			
		||||
  getLastCudaError("cudaGraphicsResourceGetMappedPointer failed");
 | 
			
		||||
 | 
			
		||||
  // Execute kernel
 | 
			
		||||
  simpleD3DKernel(d_ptr, g_MeshWidth, g_MeshHeight, anim);
 | 
			
		||||
 | 
			
		||||
  // CUDA Map Unmap vertex buffer
 | 
			
		||||
  // DEPRECATED: cudaD3D10UnmapResources(1, (ID3D10Resource **)&g_pVB);
 | 
			
		||||
  checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_VB_resource, 0));
 | 
			
		||||
  getLastCudaError("cudaGraphicsUnmapResource failed");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! Check if the result is correct or write data to file for external
 | 
			
		||||
//! regression testing
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
bool SaveResult(int argc, char **argv) {
 | 
			
		||||
  // Map vertex buffer
 | 
			
		||||
  float *data;
 | 
			
		||||
 | 
			
		||||
  if (FAILED(g_pVB->Map(D3D10_MAP_READ, 0,
 | 
			
		||||
                        (void **)&data)))  // Lock(0, 0, (void**)&data, 0)))
 | 
			
		||||
    return false;
 | 
			
		||||
 | 
			
		||||
  // Unmap
 | 
			
		||||
  g_pVB->Unmap();
 | 
			
		||||
 | 
			
		||||
  // Save result
 | 
			
		||||
  if (checkCmdLineFlag(argc, (const char **)argv, "regression")) {
 | 
			
		||||
    // write file for regression test
 | 
			
		||||
    sdkWriteFile<float>("./data/regression.dat", data, sizeof(CUSTOMVERTEX),
 | 
			
		||||
                        0.0f, false);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: InitD3D()
 | 
			
		||||
// Desc: Initializes Direct3D
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
HRESULT InitD3D(HWND hWnd) {
 | 
			
		||||
  // Set up the structure used to create the device and swapchain
 | 
			
		||||
  DXGI_SWAP_CHAIN_DESC sd;
 | 
			
		||||
  ZeroMemory(&sd, sizeof(sd));
 | 
			
		||||
  sd.BufferCount = 1;
 | 
			
		||||
  sd.BufferDesc.Width = g_WindowWidth;
 | 
			
		||||
  sd.BufferDesc.Height = g_WindowHeight;
 | 
			
		||||
  sd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
 | 
			
		||||
  sd.BufferDesc.RefreshRate.Numerator = 60;
 | 
			
		||||
  sd.BufferDesc.RefreshRate.Denominator = 1;
 | 
			
		||||
  sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
 | 
			
		||||
  sd.OutputWindow = hWnd;
 | 
			
		||||
  sd.SampleDesc.Count = 1;
 | 
			
		||||
  sd.SampleDesc.Quality = 0;
 | 
			
		||||
  sd.Windowed = TRUE;
 | 
			
		||||
 | 
			
		||||
  // Create device and swapchain
 | 
			
		||||
  HRESULT hr = sFnPtr_D3D10CreateDeviceAndSwapChain(
 | 
			
		||||
      g_pCudaCapableAdapter, D3D10_DRIVER_TYPE_HARDWARE, NULL, 0,
 | 
			
		||||
      D3D10_SDK_VERSION, &sd, &g_pSwapChain, &g_pd3dDevice);
 | 
			
		||||
  AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
  g_pCudaCapableAdapter->Release();
 | 
			
		||||
 | 
			
		||||
  // Create a render target view of the swapchain
 | 
			
		||||
  ID3D10Texture2D *pBuffer;
 | 
			
		||||
  hr =
 | 
			
		||||
      g_pSwapChain->GetBuffer(0, __uuidof(ID3D10Texture2D), (LPVOID *)&pBuffer);
 | 
			
		||||
  AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
 | 
			
		||||
  hr = g_pd3dDevice->CreateRenderTargetView(pBuffer, NULL, &g_pSwapChainRTV);
 | 
			
		||||
  AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
  pBuffer->Release();
 | 
			
		||||
 | 
			
		||||
  g_pd3dDevice->OMSetRenderTargets(1, &g_pSwapChainRTV, NULL);
 | 
			
		||||
 | 
			
		||||
  // Setup the viewport
 | 
			
		||||
  D3D10_VIEWPORT vp;
 | 
			
		||||
  vp.Width = g_WindowWidth;
 | 
			
		||||
  vp.Height = g_WindowHeight;
 | 
			
		||||
  vp.MinDepth = 0.0f;
 | 
			
		||||
  vp.MaxDepth = 1.0f;
 | 
			
		||||
  vp.TopLeftX = 0;
 | 
			
		||||
  vp.TopLeftY = 0;
 | 
			
		||||
  g_pd3dDevice->RSSetViewports(1, &vp);
 | 
			
		||||
 | 
			
		||||
  // Setup the effect
 | 
			
		||||
  {
 | 
			
		||||
    ID3D10Blob *pCompiledEffect;
 | 
			
		||||
    hr = sFnPtr_D3D10CompileEffectFromMemory((void *)g_simpleEffectSrc,
 | 
			
		||||
                                             sizeof(g_simpleEffectSrc), NULL,
 | 
			
		||||
                                             NULL,  // pDefines
 | 
			
		||||
                                             NULL,  // pIncludes
 | 
			
		||||
                                             0,     // HLSL flags
 | 
			
		||||
                                             0,     // FXFlags
 | 
			
		||||
                                             &pCompiledEffect, NULL);
 | 
			
		||||
    AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
 | 
			
		||||
    hr = sFnPtr_D3D10CreateEffectFromMemory(
 | 
			
		||||
        pCompiledEffect->GetBufferPointer(), pCompiledEffect->GetBufferSize(),
 | 
			
		||||
        0,  // FXFlags
 | 
			
		||||
        g_pd3dDevice, NULL, &g_pSimpleEffect);
 | 
			
		||||
    pCompiledEffect->Release();
 | 
			
		||||
 | 
			
		||||
    g_pSimpleTechnique = g_pSimpleEffect->GetTechniqueByName("Render");
 | 
			
		||||
 | 
			
		||||
    // g_pmWorldViewProjection =
 | 
			
		||||
    // g_pSimpleEffect->GetVariableByName("g_mWorldViewProjection")->AsMatrix();
 | 
			
		||||
    g_pmWorld = g_pSimpleEffect->GetVariableByName("g_mWorld")->AsMatrix();
 | 
			
		||||
    g_pmView = g_pSimpleEffect->GetVariableByName("g_mView")->AsMatrix();
 | 
			
		||||
    g_pmProjection =
 | 
			
		||||
        g_pSimpleEffect->GetVariableByName("g_mProjection")->AsMatrix();
 | 
			
		||||
 | 
			
		||||
    // Define the input layout
 | 
			
		||||
    D3D10_INPUT_ELEMENT_DESC layout[] = {
 | 
			
		||||
        {"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0,
 | 
			
		||||
         D3D10_INPUT_PER_VERTEX_DATA, 0},
 | 
			
		||||
        {"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 12,
 | 
			
		||||
         D3D10_INPUT_PER_VERTEX_DATA, 0},
 | 
			
		||||
    };
 | 
			
		||||
    UINT numElements = sizeof(layout) / sizeof(layout[0]);
 | 
			
		||||
 | 
			
		||||
    // Create the input layout
 | 
			
		||||
    D3D10_PASS_DESC PassDesc;
 | 
			
		||||
    g_pSimpleTechnique->GetPassByIndex(0)->GetDesc(&PassDesc);
 | 
			
		||||
    hr = g_pd3dDevice->CreateInputLayout(
 | 
			
		||||
        layout, numElements, PassDesc.pIAInputSignature,
 | 
			
		||||
        PassDesc.IAInputSignatureSize, &g_pInputLayout);
 | 
			
		||||
    AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
 | 
			
		||||
    // Setup Input Layout, apply effect and draw points
 | 
			
		||||
    g_pd3dDevice->IASetInputLayout(g_pInputLayout);
 | 
			
		||||
    g_pSimpleTechnique->GetPassByIndex(0)->Apply(0);
 | 
			
		||||
    g_pd3dDevice->IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY_POINTLIST);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return S_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: InitGeometry()
 | 
			
		||||
// Desc: Creates the scene geometry
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
HRESULT InitGeometry() {
 | 
			
		||||
  // Setup buffer desc
 | 
			
		||||
  D3D10_BUFFER_DESC bufferDesc;
 | 
			
		||||
  bufferDesc.Usage = D3D10_USAGE_DEFAULT;
 | 
			
		||||
  bufferDesc.ByteWidth = sizeof(CUSTOMVERTEX) * g_NumVertices;
 | 
			
		||||
  bufferDesc.BindFlags = D3D10_BIND_VERTEX_BUFFER;
 | 
			
		||||
  bufferDesc.CPUAccessFlags = 0;
 | 
			
		||||
  bufferDesc.MiscFlags = 0;
 | 
			
		||||
 | 
			
		||||
  // Create the buffer, no need for sub resource data struct since everything
 | 
			
		||||
  // will be defined from cuda
 | 
			
		||||
  if (FAILED(g_pd3dDevice->CreateBuffer(&bufferDesc, NULL, &g_pVB)))
 | 
			
		||||
    return E_FAIL;
 | 
			
		||||
 | 
			
		||||
  return S_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: Cleanup()
 | 
			
		||||
// Desc: Releases all previously initialized objects
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
VOID Cleanup() {
 | 
			
		||||
  if (g_pVB != NULL) {
 | 
			
		||||
    // Unregister vertex buffer
 | 
			
		||||
    // DEPRECATED: checkCudaErrors(cudaD3D10UnregisterResource(g_pVB));
 | 
			
		||||
    cudaGraphicsUnregisterResource(cuda_VB_resource);
 | 
			
		||||
    getLastCudaError("cudaGraphicsUnregisterResource failed");
 | 
			
		||||
 | 
			
		||||
    g_pVB->Release();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (g_pInputLayout != NULL) g_pInputLayout->Release();
 | 
			
		||||
 | 
			
		||||
  if (g_pSimpleEffect != NULL) g_pSimpleEffect->Release();
 | 
			
		||||
 | 
			
		||||
  if (g_pSwapChainRTV != NULL) g_pSwapChainRTV->Release();
 | 
			
		||||
 | 
			
		||||
  if (g_pSwapChain != NULL) g_pSwapChain->Release();
 | 
			
		||||
 | 
			
		||||
  if (g_pd3dDevice != NULL) g_pd3dDevice->Release();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: SetupMatrices()
 | 
			
		||||
// Desc: Sets up the world, view, and projection transform matrices.
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
VOID SetupMatrices() {
 | 
			
		||||
  XMMATRIX matWorld;
 | 
			
		||||
  matWorld = XMMatrixIdentity();
 | 
			
		||||
 | 
			
		||||
  XMVECTOR vEyePt = {0.0f, 3.0f, -2.0f};
 | 
			
		||||
  XMVECTOR vLookatPt = {0.0f, 0.0f, 0.0f};
 | 
			
		||||
  XMVECTOR vUpVec = {0.0f, 1.0f, 0.0f};
 | 
			
		||||
  XMMATRIX matView;
 | 
			
		||||
  matView = XMMatrixLookAtLH(vEyePt, vLookatPt, vUpVec);
 | 
			
		||||
 | 
			
		||||
  XMMATRIX matProj;
 | 
			
		||||
  matProj = XMMatrixPerspectiveFovLH((float)XM_PI / 4.f, 1.0f, 0.01f, 10.0f);
 | 
			
		||||
 | 
			
		||||
  g_pmWorld->SetMatrix((float *)&matWorld);
 | 
			
		||||
  g_pmView->SetMatrix((float *)&matView);
 | 
			
		||||
  g_pmProjection->SetMatrix((float *)&matProj);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: Render()
 | 
			
		||||
// Desc: Draws the scene
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
VOID Render() {
 | 
			
		||||
  // Clear the backbuffer to a black color
 | 
			
		||||
  float ClearColor[4] = {0, 0, 0, 0};
 | 
			
		||||
  g_pd3dDevice->ClearRenderTargetView(g_pSwapChainRTV, ClearColor);
 | 
			
		||||
 | 
			
		||||
  // Run CUDA to update vertex positions
 | 
			
		||||
  runCuda();
 | 
			
		||||
 | 
			
		||||
  // Draw frame
 | 
			
		||||
  {
 | 
			
		||||
    // Setup the world, view, and projection matrices
 | 
			
		||||
    SetupMatrices();
 | 
			
		||||
 | 
			
		||||
    // Render the vertex buffer contents
 | 
			
		||||
    UINT stride = sizeof(CUSTOMVERTEX);
 | 
			
		||||
    UINT offset = 0;
 | 
			
		||||
    g_pd3dDevice->IASetVertexBuffers(0, 1, &g_pVB, &stride, &offset);
 | 
			
		||||
 | 
			
		||||
    g_pSimpleTechnique->GetPassByIndex(0)->Apply(0);
 | 
			
		||||
    g_pd3dDevice->Draw(g_NumVertices, 0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Present the backbuffer contents to the display
 | 
			
		||||
  g_pSwapChain->Present(0, 0);
 | 
			
		||||
 | 
			
		||||
  anim += 0.01f;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: MsgProc()
 | 
			
		||||
// Desc: The window's message handler
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) {
 | 
			
		||||
  switch (msg) {
 | 
			
		||||
    case WM_DESTROY:
 | 
			
		||||
    case WM_KEYDOWN:
 | 
			
		||||
      if (msg != WM_KEYDOWN || wParam == 27) {
 | 
			
		||||
        Cleanup();
 | 
			
		||||
        PostQuitMessage(0);
 | 
			
		||||
        return 0;
 | 
			
		||||
      }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return DefWindowProc(hWnd, msg, wParam, lParam);
 | 
			
		||||
}
 | 
			
		||||
@ -1,85 +0,0 @@
 | 
			
		||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
/* This example demonstrates how to use the CUDA Direct3D bindings with the
 | 
			
		||||
 * runtime API.
 | 
			
		||||
 * Device code.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifndef _SIMPLED3D_KERNEL_CU_
 | 
			
		||||
#define _SIMPLED3D_KERNEL_CU_
 | 
			
		||||
 | 
			
		||||
// includes, C string library
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
// includes, cuda
 | 
			
		||||
#include <cuda.h>
 | 
			
		||||
#include <builtin_types.h>
 | 
			
		||||
#include <cuda_runtime_api.h>
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! Simple kernel to modify vertex positions in sine wave pattern
 | 
			
		||||
//! @param pos  pos in global memory
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
__global__ void kernel(float4 *pos, unsigned int width, unsigned int height,
 | 
			
		||||
                       float time) {
 | 
			
		||||
  unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
 | 
			
		||||
  unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
 | 
			
		||||
 | 
			
		||||
  // calculate uv coordinates
 | 
			
		||||
  float u = x / (float)width;
 | 
			
		||||
  float v = y / (float)height;
 | 
			
		||||
  u = u * 2.0f - 1.0f;
 | 
			
		||||
  v = v * 2.0f - 1.0f;
 | 
			
		||||
 | 
			
		||||
  // calculate simple sine wave pattern
 | 
			
		||||
  float freq = 4.0f;
 | 
			
		||||
  float w = sinf(u * freq + time) * cosf(v * freq + time) * 0.5f;
 | 
			
		||||
 | 
			
		||||
  // write output vertex
 | 
			
		||||
  pos[y * width + x] = make_float4(u, w, v, __int_as_float(0xff00ff00));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern "C" void simpleD3DKernel(float4 *pos, unsigned int width,
 | 
			
		||||
                                unsigned int height, float time) {
 | 
			
		||||
  cudaError_t error = cudaSuccess;
 | 
			
		||||
 | 
			
		||||
  dim3 block(8, 8, 1);
 | 
			
		||||
  dim3 grid(width / block.x, height / block.y, 1);
 | 
			
		||||
 | 
			
		||||
  kernel<<<grid, block>>>(pos, width, height, time);
 | 
			
		||||
 | 
			
		||||
  error = cudaGetLastError();
 | 
			
		||||
 | 
			
		||||
  if (error != cudaSuccess) {
 | 
			
		||||
    printf("kernel() failed to launch error = %d\n", error);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif  // #ifndef _SIMPLED3D_KERNEL_CU_
 | 
			
		||||
@ -1,46 +0,0 @@
 | 
			
		||||
################################################################################
 | 
			
		||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
#
 | 
			
		||||
# Redistribution and use in source and binary forms, with or without
 | 
			
		||||
# modification, are permitted provided that the following conditions
 | 
			
		||||
# are met:
 | 
			
		||||
#  * Redistributions of source code must retain the above copyright
 | 
			
		||||
#    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
#  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
#    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
#    documentation and/or other materials provided with the distribution.
 | 
			
		||||
#  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
#    contributors may be used to endorse or promote products derived
 | 
			
		||||
#    from this software without specific prior written permission.
 | 
			
		||||
#
 | 
			
		||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
#
 | 
			
		||||
################################################################################
 | 
			
		||||
#
 | 
			
		||||
# Makefile project only supported on Mac OS X and Linux Platforms)
 | 
			
		||||
#
 | 
			
		||||
################################################################################
 | 
			
		||||
 | 
			
		||||
# Target rules
 | 
			
		||||
all: build
 | 
			
		||||
 | 
			
		||||
build:
 | 
			
		||||
	$(info >>> WARNING - simpleD3D10RenderTarget is not supported on Linux - waiving sample <<<)
 | 
			
		||||
 | 
			
		||||
run: build
 | 
			
		||||
 | 
			
		||||
testrun: build
 | 
			
		||||
 | 
			
		||||
clean:
 | 
			
		||||
 | 
			
		||||
clobber: clean
 | 
			
		||||
@ -1,49 +0,0 @@
 | 
			
		||||
# simpleD3D10RenderTarget - Simple Direct3D10 Render Target
 | 
			
		||||
 | 
			
		||||
## Description
 | 
			
		||||
 | 
			
		||||
Simple program which demonstrates interop of rendertargets between Direct3D10 and CUDA. The program uses RenderTarget positions with CUDA and generates a histogram with visualization.  A Direct3D10 Capable device is required.
 | 
			
		||||
 | 
			
		||||
## Key Concepts
 | 
			
		||||
 | 
			
		||||
Graphics Interop, Texture
 | 
			
		||||
 | 
			
		||||
## Supported SM Architectures
 | 
			
		||||
 | 
			
		||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.9 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 | 
			
		||||
 | 
			
		||||
## Supported OSes
 | 
			
		||||
 | 
			
		||||
Windows
 | 
			
		||||
 | 
			
		||||
## Supported CPU Architecture
 | 
			
		||||
 | 
			
		||||
x86_64
 | 
			
		||||
 | 
			
		||||
## CUDA APIs involved
 | 
			
		||||
 | 
			
		||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 | 
			
		||||
cudaGraphicsUnmapResources, cudaMemcpy, cudaMalloc, cudaUnbindTexture, cudaGetErrorString, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaBindTextureToArray, cudaGetDeviceProperties
 | 
			
		||||
 | 
			
		||||
## Dependencies needed to build/run
 | 
			
		||||
[DirectX](../../../README.md#directx)
 | 
			
		||||
 | 
			
		||||
## Prerequisites
 | 
			
		||||
 | 
			
		||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 | 
			
		||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 | 
			
		||||
 | 
			
		||||
## Build and Run
 | 
			
		||||
 | 
			
		||||
### Windows
 | 
			
		||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
 | 
			
		||||
```
 | 
			
		||||
*_vs<version>.sln - for Visual Studio <version>
 | 
			
		||||
```
 | 
			
		||||
Each individual sample has its own set of solution files in its directory:
 | 
			
		||||
 | 
			
		||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
 | 
			
		||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
 | 
			
		||||
 | 
			
		||||
## References (for more details)
 | 
			
		||||
 | 
			
		||||
										
											Binary file not shown.
										
									
								
							@ -1,850 +0,0 @@
 | 
			
		||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
/* This example demonstrates how to use the CUDA Direct3D bindings to fill
 | 
			
		||||
 * a vertex buffer with CUDA and use Direct3D to render the data.
 | 
			
		||||
 * Host code.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#pragma warning(disable : 4312)
 | 
			
		||||
 | 
			
		||||
#include <windows.h>
 | 
			
		||||
#include <mmsystem.h>
 | 
			
		||||
 | 
			
		||||
// this header inclues all the necessary D3D10 includes
 | 
			
		||||
#include <dynlink_d3d10.h>
 | 
			
		||||
#include <cuda_runtime_api.h>
 | 
			
		||||
#include <cuda_d3d10_interop.h>
 | 
			
		||||
 | 
			
		||||
// includes, project
 | 
			
		||||
#include <rendercheck_d3d10.h>
 | 
			
		||||
#include <helper_cuda.h>
 | 
			
		||||
#include <helper_functions.h>
 | 
			
		||||
 | 
			
		||||
int g_iFrameToCompare = 10;
 | 
			
		||||
 | 
			
		||||
bool g_bDone = false;
 | 
			
		||||
bool g_bPassed = true;
 | 
			
		||||
 | 
			
		||||
int *pArgc = NULL;
 | 
			
		||||
char **pArgv = NULL;
 | 
			
		||||
 | 
			
		||||
#define MAX_EPSILON 10
 | 
			
		||||
 | 
			
		||||
static char *SDK_name = "simpleD3D10RenderTarget";
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Global variables
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
IDXGIAdapter *g_pCudaCapableAdapter = NULL;  // Adapter to use
 | 
			
		||||
ID3D10Device *g_pd3dDevice = NULL;           // Our rendering device
 | 
			
		||||
IDXGISwapChain *g_pSwapChain = NULL;         // The swap chain of the window
 | 
			
		||||
ID3D10RenderTargetView *g_pSwapChainRTV =
 | 
			
		||||
    NULL;  // The Render target view on the swap chain ( used for clear)
 | 
			
		||||
ID3D10RasterizerState *g_pRasterState = NULL;
 | 
			
		||||
 | 
			
		||||
struct Color {
 | 
			
		||||
  ID3D10Texture2D *pBuffer;  // The color buffer
 | 
			
		||||
  ID3D10RenderTargetView
 | 
			
		||||
      *pBufferRTV;  // The Render target view on the color buffer
 | 
			
		||||
  ID3D10ShaderResourceView
 | 
			
		||||
      *pBufferSRV;  // The shader resource view on the color buffer
 | 
			
		||||
  cudaGraphicsResource *cudaResource;  // resource of the Buffer on cuda side
 | 
			
		||||
  int pitch;
 | 
			
		||||
  cudaArray *pCudaArray;  // the data in a cuda view
 | 
			
		||||
} g_color;
 | 
			
		||||
 | 
			
		||||
struct Histogram {
 | 
			
		||||
  ID3D10Buffer *pBuffer;                 // Buffer to hold histogram
 | 
			
		||||
  ID3D10ShaderResourceView *pBufferSRV;  // View on the histogram buffer
 | 
			
		||||
  cudaGraphicsResource *cudaResource;    // resource of the Buffer on cuda side
 | 
			
		||||
  unsigned int *cudaBuffer;  // staging buffer to allow cuda to write results
 | 
			
		||||
  // cudaArray*                    pCudaArray; // the data in a cuda view
 | 
			
		||||
  size_t size;
 | 
			
		||||
} g_histogram;
 | 
			
		||||
 | 
			
		||||
ID3D10Effect *g_pDisplayEffect = NULL;
 | 
			
		||||
ID3D10EffectTechnique *g_pDisplayTechnique = NULL;
 | 
			
		||||
ID3D10EffectScalarVariable *g_pTime = NULL;
 | 
			
		||||
 | 
			
		||||
static const char g_displayEffectSrc[] =
 | 
			
		||||
    "float   g_Time; \n"
 | 
			
		||||
    "uint2   g_vGrid = uint2(20,20); \n"
 | 
			
		||||
    "float4 g_vGridSize = float4(0.05f, 0.05f, 0.046f, 0.046f); \n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "struct Fragment{ \n"
 | 
			
		||||
    "    float4 Pos : SV_POSITION;\n"
 | 
			
		||||
    "    float2 Tex : TEXCOORD0; \n"
 | 
			
		||||
    "    float4 Col : TEXCOORD1; };\n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "Fragment VS( uint instanceId : SV_InstanceID, uint vertexId : SV_VertexID "
 | 
			
		||||
    ")\n"
 | 
			
		||||
    "{\n"
 | 
			
		||||
    "    Fragment f;\n"
 | 
			
		||||
    "    f.Tex = float2( 1.f*((vertexId == 1) || (vertexId == 3)), 1.f*( "
 | 
			
		||||
    "vertexId >= 2)); \n"
 | 
			
		||||
    "    \n"
 | 
			
		||||
    "    uint2 cellId = uint2(instanceId % g_vGrid.x, instanceId / "
 | 
			
		||||
    "g_vGrid.x);\n"
 | 
			
		||||
    "    f.Pos = float4( g_vGridSize.xy*cellId + 0.5f*(g_vGridSize.xy - "
 | 
			
		||||
    "g_vGridSize.zw) + f.Tex * g_vGridSize.zw, 0, 1);\n"
 | 
			
		||||
    "    f.Pos.xy = (f.Pos.xy*2.f - 1.f);\n"
 | 
			
		||||
    "    \n"
 | 
			
		||||
    "    f.Col = float4( ((g_vGrid.x-1.f) - cellId.x) / (g_vGrid.x-1.f), "
 | 
			
		||||
    "(cellId.x + (g_vGrid.y-1.f) - cellId.y) / (g_vGrid.x+g_vGrid.y-1.f), "
 | 
			
		||||
    "cellId.y / (g_vGrid.y-1.f), 1.f);\n"
 | 
			
		||||
    "    f.Col *= float4( 0.5 + 0.5*sin(g_Time), 0.5 + "
 | 
			
		||||
    "0.5*sin(g_Time)*cos(g_Time), 0.5 + 0.5*cos(g_Time), 1.f);\n"
 | 
			
		||||
    "    return f;\n"
 | 
			
		||||
    "}\n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "float4 PS( Fragment f ) : SV_Target\n"
 | 
			
		||||
    "{\n"
 | 
			
		||||
    "    return f.Col;\n"
 | 
			
		||||
    "}\n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "technique10 Render\n"
 | 
			
		||||
    "{\n"
 | 
			
		||||
    "    pass P0\n"
 | 
			
		||||
    "    {\n"
 | 
			
		||||
    "        SetVertexShader( CompileShader( vs_4_0, VS() ) );\n"
 | 
			
		||||
    "        SetGeometryShader( NULL );\n"
 | 
			
		||||
    "        SetPixelShader( CompileShader( ps_4_0, PS() ) );\n"
 | 
			
		||||
    "    }\n"
 | 
			
		||||
    "}\n"
 | 
			
		||||
    "\n";
 | 
			
		||||
 | 
			
		||||
ID3D10Effect *g_pCompositeEffect = NULL;
 | 
			
		||||
ID3D10EffectTechnique *g_pCompositeTechnique = NULL;
 | 
			
		||||
ID3D10EffectVectorVariable *g_pvQuadRect = NULL;
 | 
			
		||||
ID3D10EffectScalarVariable *g_pUseCase = NULL;
 | 
			
		||||
ID3D10EffectShaderResourceVariable *g_pTexture2D = NULL;
 | 
			
		||||
ID3D10EffectShaderResourceVariable *g_pHistogram = NULL;
 | 
			
		||||
 | 
			
		||||
static const char g_compositeEffectSrc[] =
 | 
			
		||||
    "float4 g_vQuadRect; \n"
 | 
			
		||||
    "int g_UseCase; \n"
 | 
			
		||||
    "Texture2D g_Texture2D; \n"
 | 
			
		||||
    "Buffer<uint> g_Histogram; \n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "SamplerState samLinear{ \n"
 | 
			
		||||
    "    Filter = MIN_MAG_LINEAR_MIP_POINT; \n"
 | 
			
		||||
    "};\n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "struct Fragment{ \n"
 | 
			
		||||
    "    float4 Pos : SV_POSITION;\n"
 | 
			
		||||
    "    float3 Tex : TEXCOORD0; \n"
 | 
			
		||||
    "    float2 uv : TEXCOORD1; };\n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "Fragment VS( uint vertexId : SV_VertexID )\n"
 | 
			
		||||
    "{\n"
 | 
			
		||||
    "    Fragment f;\n"
 | 
			
		||||
    "    f.Tex = float3( 1.f*((vertexId == 1) || (vertexId == 3)), 1.f*( "
 | 
			
		||||
    "vertexId >= 2), 0.f); \n"
 | 
			
		||||
    "    \n"
 | 
			
		||||
    "    f.Pos = float4( g_vQuadRect.xy + f.Tex * g_vQuadRect.zw, 0, 1);\n"
 | 
			
		||||
    "    \n"
 | 
			
		||||
    "    f.uv = float2( f.Tex.x*255.f, f.Tex.y*50000.f ); \n"
 | 
			
		||||
    "    return f;\n"
 | 
			
		||||
    "}\n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "float4 PS( Fragment f ) : SV_Target\n"
 | 
			
		||||
    "{\n"
 | 
			
		||||
    "    if (g_UseCase == 0) \n"
 | 
			
		||||
    "        return g_Texture2D.Sample( samLinear, f.Tex.xy ); \n"
 | 
			
		||||
    "    else if (g_UseCase == 1) { \n"
 | 
			
		||||
    "        uint index = f.uv.x; \n"
 | 
			
		||||
    "        float value = g_Histogram.Load( index ); \n"
 | 
			
		||||
    "        //float value = index * 1000; \n"
 | 
			
		||||
    "        float red = ( value >= f.uv.y ? (0.5f * f.uv.y / value) + 0.5f : "
 | 
			
		||||
    "0.f ); \n"
 | 
			
		||||
    "        return float4(red, 0, 0, 1); \n"
 | 
			
		||||
    "    } else return float4(f.Tex, 1);\n"
 | 
			
		||||
    "}\n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "technique10 Render\n"
 | 
			
		||||
    "{\n"
 | 
			
		||||
    "    pass P0\n"
 | 
			
		||||
    "    {\n"
 | 
			
		||||
    "        SetVertexShader( CompileShader( vs_4_0, VS() ) );\n"
 | 
			
		||||
    "        SetGeometryShader( NULL );\n"
 | 
			
		||||
    "        SetPixelShader( CompileShader( ps_4_0, PS() ) );\n"
 | 
			
		||||
    "    }\n"
 | 
			
		||||
    "}\n"
 | 
			
		||||
    "\n";
 | 
			
		||||
 | 
			
		||||
// testing/tracing function used pervasively in tests.  if the condition is
 | 
			
		||||
// unsatisfied then spew and fail the function immediately (doing no cleanup)
 | 
			
		||||
#define AssertOrQuit(x)                                                  \
 | 
			
		||||
  if (!(x)) {                                                            \
 | 
			
		||||
    fprintf(stdout, "Assert unsatisfied in %s at %s:%d\n", __FUNCTION__, \
 | 
			
		||||
            __FILE__, __LINE__);                                         \
 | 
			
		||||
    return 1;                                                            \
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
const unsigned int g_WindowWidth = 800;
 | 
			
		||||
const unsigned int g_WindowHeight = 800;
 | 
			
		||||
const unsigned int g_HistogramSize = 256;
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Forward declarations
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
void runTest(int argc, char **argv, char *ref_file);
 | 
			
		||||
void runCuda();
 | 
			
		||||
HRESULT InitD3D(HWND hWnd);
 | 
			
		||||
VOID Cleanup();
 | 
			
		||||
VOID Render();
 | 
			
		||||
LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
 | 
			
		||||
 | 
			
		||||
// CUDA/D3D10 kernels
 | 
			
		||||
extern "C" void checkCudaError();
 | 
			
		||||
extern "C" void createHistogramTex(unsigned int *histogram, unsigned int width,
 | 
			
		||||
                                   unsigned int height, cudaArray *colorArray);
 | 
			
		||||
 | 
			
		||||
#define NAME_LEN 512
 | 
			
		||||
 | 
			
		||||
bool findCUDADevice() {
 | 
			
		||||
  int nGraphicsGPU = 0;
 | 
			
		||||
  int deviceCount = 0;
 | 
			
		||||
  bool bFoundGraphics = false;
 | 
			
		||||
  char devname[NAME_LEN];
 | 
			
		||||
 | 
			
		||||
  // This function call returns 0 if there are no CUDA capable devices.
 | 
			
		||||
  cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
 | 
			
		||||
 | 
			
		||||
  if (error_id != cudaSuccess) {
 | 
			
		||||
    printf("cudaGetDeviceCount returned %d\n-> %s\n", (int)error_id,
 | 
			
		||||
           cudaGetErrorString(error_id));
 | 
			
		||||
    exit(EXIT_FAILURE);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (deviceCount == 0) {
 | 
			
		||||
    printf("> There are no device(s) supporting CUDA\n");
 | 
			
		||||
    return false;
 | 
			
		||||
  } else {
 | 
			
		||||
    printf("> Found %d CUDA Capable Device(s)\n", deviceCount);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Get CUDA device properties
 | 
			
		||||
  cudaDeviceProp deviceProp;
 | 
			
		||||
 | 
			
		||||
  for (int dev = 0; dev < deviceCount; ++dev) {
 | 
			
		||||
    cudaGetDeviceProperties(&deviceProp, dev);
 | 
			
		||||
    STRCPY(devname, NAME_LEN, deviceProp.name);
 | 
			
		||||
    printf("> GPU %d: %s\n", dev, devname);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool findDXDevice(char *dev_name) {
 | 
			
		||||
  HRESULT hr = S_OK;
 | 
			
		||||
  cudaError cuStatus;
 | 
			
		||||
 | 
			
		||||
  // Iterate through the candidate adapters
 | 
			
		||||
  IDXGIFactory *pFactory;
 | 
			
		||||
  hr = sFnPtr_CreateDXGIFactory(__uuidof(IDXGIFactory), (void **)(&pFactory));
 | 
			
		||||
 | 
			
		||||
  if (!SUCCEEDED(hr)) {
 | 
			
		||||
    printf("> No DXGI Factory created.\n");
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  UINT adapter = 0;
 | 
			
		||||
 | 
			
		||||
  for (; !g_pCudaCapableAdapter; ++adapter) {
 | 
			
		||||
    // Get a candidate DXGI adapter
 | 
			
		||||
    IDXGIAdapter *pAdapter = NULL;
 | 
			
		||||
    hr = pFactory->EnumAdapters(adapter, &pAdapter);
 | 
			
		||||
 | 
			
		||||
    if (FAILED(hr)) {
 | 
			
		||||
      break;  // no compatible adapters found
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Query to see if there exists a corresponding compute device
 | 
			
		||||
    int cuDevice;
 | 
			
		||||
    cuStatus = cudaD3D10GetDevice(&cuDevice, pAdapter);
 | 
			
		||||
    printLastCudaError("cudaD3D10GetDevice failed");  // This prints and resets
 | 
			
		||||
                                                      // the cudaError to
 | 
			
		||||
                                                      // cudaSuccess
 | 
			
		||||
 | 
			
		||||
    if (cudaSuccess == cuStatus) {
 | 
			
		||||
      // If so, mark it as the one against which to create our d3d10 device
 | 
			
		||||
      g_pCudaCapableAdapter = pAdapter;
 | 
			
		||||
      g_pCudaCapableAdapter->AddRef();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pAdapter->Release();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  printf("> Found %d D3D10 Adapater(s).\n", (int)adapter);
 | 
			
		||||
 | 
			
		||||
  pFactory->Release();
 | 
			
		||||
 | 
			
		||||
  if (!g_pCudaCapableAdapter) {
 | 
			
		||||
    printf("> Found 0 D3D10 Adapater(s) /w Compute capability.\n");
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  DXGI_ADAPTER_DESC adapterDesc;
 | 
			
		||||
  g_pCudaCapableAdapter->GetDesc(&adapterDesc);
 | 
			
		||||
  wcstombs(dev_name, adapterDesc.Description, 128);
 | 
			
		||||
 | 
			
		||||
  printf("> Found 1 D3D10 Adapater(s) /w Compute capability.\n");
 | 
			
		||||
  printf("> %s\n", dev_name);
 | 
			
		||||
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Program main
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
int main(int argc, char **argv) {
 | 
			
		||||
  char device_name[256];
 | 
			
		||||
  char *ref_file = NULL;
 | 
			
		||||
 | 
			
		||||
  pArgc = &argc;
 | 
			
		||||
  pArgv = argv;
 | 
			
		||||
 | 
			
		||||
  printf("[%s] - Starting...\n", SDK_name);
 | 
			
		||||
 | 
			
		||||
  if (!findCUDADevice())  // Search for CUDA GPU
 | 
			
		||||
  {
 | 
			
		||||
    printf("> CUDA Device NOT found on \"%s\".. Exiting.\n", device_name);
 | 
			
		||||
    exit(EXIT_SUCCESS);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (!dynlinkLoadD3D10API())  // Search for D3D API (locate drivers, does not
 | 
			
		||||
                               // mean device is found)
 | 
			
		||||
  {
 | 
			
		||||
    printf("> D3D10 API libraries NOT found.. Exiting.\n");
 | 
			
		||||
    dynlinkUnloadD3D10API();
 | 
			
		||||
    exit(EXIT_SUCCESS);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (!findDXDevice(device_name))  // Search for D3D Hardware Device
 | 
			
		||||
  {
 | 
			
		||||
    printf("> D3D10 Graphics Device NOT found.. Exiting.\n");
 | 
			
		||||
    dynlinkUnloadD3D10API();
 | 
			
		||||
    exit(EXIT_SUCCESS);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // command line options
 | 
			
		||||
  if (argc > 1) {
 | 
			
		||||
    // automatied build testing harness
 | 
			
		||||
    if (checkCmdLineFlag(argc, (const char **)argv, "file"))
 | 
			
		||||
      getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // run D3D10/CUDA test
 | 
			
		||||
  runTest(argc, argv, ref_file);
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // and exit
 | 
			
		||||
  //
 | 
			
		||||
  printf("%s running on %s exiting...\n", SDK_name, device_name);
 | 
			
		||||
 | 
			
		||||
  exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! Run a simple test for CUDA
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void runTest(int argc, char **argv, char *ref_file) {
 | 
			
		||||
  // Register the window class
 | 
			
		||||
  WNDCLASSEX wc = {sizeof(WNDCLASSEX),
 | 
			
		||||
                   CS_CLASSDC,
 | 
			
		||||
                   MsgProc,
 | 
			
		||||
                   0L,
 | 
			
		||||
                   0L,
 | 
			
		||||
                   GetModuleHandle(NULL),
 | 
			
		||||
                   NULL,
 | 
			
		||||
                   NULL,
 | 
			
		||||
                   NULL,
 | 
			
		||||
                   NULL,
 | 
			
		||||
                   "CUDA SDK",
 | 
			
		||||
                   NULL};
 | 
			
		||||
  RegisterClassEx(&wc);
 | 
			
		||||
 | 
			
		||||
  // Create the application's window
 | 
			
		||||
  HWND hWnd = CreateWindow(wc.lpszClassName, "CUDA/D3D10 RenderTarget InterOP",
 | 
			
		||||
                           WS_OVERLAPPEDWINDOW, 100, 100, g_WindowWidth,
 | 
			
		||||
                           g_WindowHeight, NULL, NULL, wc.hInstance, NULL);
 | 
			
		||||
 | 
			
		||||
  // Initialize Direct3D
 | 
			
		||||
  if (SUCCEEDED(InitD3D(hWnd))) {
 | 
			
		||||
    // Initialize interoperability between CUDA and Direct3D
 | 
			
		||||
    // Register vertex buffer with CUDA
 | 
			
		||||
    cudaGraphicsD3D10RegisterResource(&g_histogram.cudaResource,
 | 
			
		||||
                                      g_histogram.pBuffer,
 | 
			
		||||
                                      cudaGraphicsMapFlagsNone);
 | 
			
		||||
    getLastCudaError("cudaGraphicsD3D10RegisterResource (g_pHistogram) failed");
 | 
			
		||||
 | 
			
		||||
    // Register color buffer with CUDA
 | 
			
		||||
    cudaGraphicsD3D10RegisterResource(&g_color.cudaResource, g_color.pBuffer,
 | 
			
		||||
                                      cudaGraphicsMapFlagsNone);
 | 
			
		||||
    getLastCudaError(
 | 
			
		||||
        "cudaGraphicsD3D10RegisterResource (g_color.pBuffer) failed");
 | 
			
		||||
 | 
			
		||||
    // Show the window
 | 
			
		||||
    ShowWindow(hWnd, SW_SHOWDEFAULT);
 | 
			
		||||
    UpdateWindow(hWnd);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // The main loop
 | 
			
		||||
  //
 | 
			
		||||
  while (false == g_bDone) {
 | 
			
		||||
    Render();
 | 
			
		||||
 | 
			
		||||
    //
 | 
			
		||||
    // handle I/O
 | 
			
		||||
    //
 | 
			
		||||
    MSG msg;
 | 
			
		||||
    ZeroMemory(&msg, sizeof(msg));
 | 
			
		||||
 | 
			
		||||
    while (msg.message != WM_QUIT) {
 | 
			
		||||
      if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE)) {
 | 
			
		||||
        TranslateMessage(&msg);
 | 
			
		||||
        DispatchMessage(&msg);
 | 
			
		||||
      } else {
 | 
			
		||||
        Render();
 | 
			
		||||
 | 
			
		||||
        if (ref_file) {
 | 
			
		||||
          for (int count = 0; count < g_iFrameToCompare; count++) {
 | 
			
		||||
            Render();
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          const char *cur_image_path = "simpleD3D10RenderTarget.ppm";
 | 
			
		||||
 | 
			
		||||
          // Save a reference of our current test run image
 | 
			
		||||
          CheckRenderD3D10::ActiveRenderTargetToPPM(g_pd3dDevice,
 | 
			
		||||
                                                    cur_image_path);
 | 
			
		||||
 | 
			
		||||
          // compare to offical reference image, printing PASS or FAIL.
 | 
			
		||||
          g_bPassed = CheckRenderD3D10::PPMvsPPM(cur_image_path, ref_file,
 | 
			
		||||
                                                 argv[0], MAX_EPSILON, 0.15f);
 | 
			
		||||
 | 
			
		||||
          g_bDone = true;
 | 
			
		||||
 | 
			
		||||
          Cleanup();
 | 
			
		||||
 | 
			
		||||
          PostQuitMessage(0);
 | 
			
		||||
        } else {
 | 
			
		||||
          g_bPassed = true;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Release D3D Library (after message loop)
 | 
			
		||||
  dynlinkUnloadD3D10API();
 | 
			
		||||
 | 
			
		||||
  // Unregister windows class
 | 
			
		||||
  UnregisterClass(wc.lpszClassName, wc.hInstance);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! Run the Cuda part of the computation
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void runCuda() {
 | 
			
		||||
  cudaStream_t stream = 0;
 | 
			
		||||
  const int nbResources = 2;
 | 
			
		||||
  cudaGraphicsResource *ppResources[nbResources] = {
 | 
			
		||||
      g_histogram.cudaResource, g_color.cudaResource,
 | 
			
		||||
  };
 | 
			
		||||
  // Map resources for Cuda
 | 
			
		||||
  checkCudaErrors(cudaGraphicsMapResources(nbResources, ppResources, stream));
 | 
			
		||||
  getLastCudaError("cudaGraphicsMapResources(2) failed");
 | 
			
		||||
  // Get pointers
 | 
			
		||||
  checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
 | 
			
		||||
      (void **)&g_histogram.cudaBuffer, &g_histogram.size,
 | 
			
		||||
      g_histogram.cudaResource));
 | 
			
		||||
  getLastCudaError(
 | 
			
		||||
      "cudaGraphicsResourceGetMappedPointer (g_color.pBuffer) failed");
 | 
			
		||||
  cudaGraphicsSubResourceGetMappedArray(&g_color.pCudaArray,
 | 
			
		||||
                                        g_color.cudaResource, 0, 0);
 | 
			
		||||
  getLastCudaError(
 | 
			
		||||
      "cudaGraphicsSubResourceGetMappedArray (g_color.pBuffer) failed");
 | 
			
		||||
 | 
			
		||||
  // Execute kernel
 | 
			
		||||
  createHistogramTex(g_histogram.cudaBuffer, g_WindowWidth, g_WindowHeight,
 | 
			
		||||
                     g_color.pCudaArray);
 | 
			
		||||
  checkCudaError();
 | 
			
		||||
  //
 | 
			
		||||
  // unmap the resources
 | 
			
		||||
  //
 | 
			
		||||
  checkCudaErrors(cudaGraphicsUnmapResources(nbResources, ppResources, stream));
 | 
			
		||||
  getLastCudaError("cudaGraphicsUnmapResources(2) failed");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: InitD3D()
 | 
			
		||||
// Desc: Initializes Direct3D
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
HRESULT InitD3D(HWND hWnd) {
 | 
			
		||||
  // Set up the structure used to create the device and swapchain
 | 
			
		||||
  DXGI_SWAP_CHAIN_DESC sd;
 | 
			
		||||
  ZeroMemory(&sd, sizeof(sd));
 | 
			
		||||
  sd.BufferCount = 1;
 | 
			
		||||
  sd.BufferDesc.Width = g_WindowWidth;
 | 
			
		||||
  sd.BufferDesc.Height = g_WindowHeight;
 | 
			
		||||
  sd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
 | 
			
		||||
  sd.BufferDesc.RefreshRate.Numerator = 60;
 | 
			
		||||
  sd.BufferDesc.RefreshRate.Denominator = 1;
 | 
			
		||||
  sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
 | 
			
		||||
  sd.OutputWindow = hWnd;
 | 
			
		||||
  sd.SampleDesc.Count = 1;
 | 
			
		||||
  sd.SampleDesc.Quality = 0;
 | 
			
		||||
  sd.Windowed = TRUE;
 | 
			
		||||
 | 
			
		||||
  // Create device and swapchain
 | 
			
		||||
  HRESULT hr = sFnPtr_D3D10CreateDeviceAndSwapChain(
 | 
			
		||||
      g_pCudaCapableAdapter, D3D10_DRIVER_TYPE_HARDWARE, NULL,
 | 
			
		||||
      0,  // D3D10_CREATE_DEVICE_DEBUG,
 | 
			
		||||
      D3D10_SDK_VERSION, &sd, &g_pSwapChain, &g_pd3dDevice);
 | 
			
		||||
  AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
 | 
			
		||||
  g_pCudaCapableAdapter->Release();
 | 
			
		||||
 | 
			
		||||
  // Create a render target view of the swapchain
 | 
			
		||||
  ID3D10Texture2D *pBuffer;
 | 
			
		||||
  hr =
 | 
			
		||||
      g_pSwapChain->GetBuffer(0, __uuidof(ID3D10Texture2D), (LPVOID *)&pBuffer);
 | 
			
		||||
  AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
 | 
			
		||||
  hr = g_pd3dDevice->CreateRenderTargetView(pBuffer, NULL, &g_pSwapChainRTV);
 | 
			
		||||
  AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
  pBuffer->Release();
 | 
			
		||||
 | 
			
		||||
  // Create a color buffer, corresponding render target view and shader resource
 | 
			
		||||
  // view
 | 
			
		||||
  D3D10_TEXTURE2D_DESC tex2Ddesc;
 | 
			
		||||
  ZeroMemory(&tex2Ddesc, sizeof(D3D10_TEXTURE2D_DESC));
 | 
			
		||||
  tex2Ddesc.Width = g_WindowWidth;
 | 
			
		||||
  tex2Ddesc.Height = g_WindowHeight;
 | 
			
		||||
  tex2Ddesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
 | 
			
		||||
  tex2Ddesc.MipLevels = 1;
 | 
			
		||||
  tex2Ddesc.ArraySize = 1;
 | 
			
		||||
  tex2Ddesc.SampleDesc.Count = 1;
 | 
			
		||||
  tex2Ddesc.Usage = D3D10_USAGE_DEFAULT;
 | 
			
		||||
  tex2Ddesc.BindFlags = D3D10_BIND_RENDER_TARGET | D3D10_BIND_SHADER_RESOURCE;
 | 
			
		||||
 | 
			
		||||
  hr = g_pd3dDevice->CreateTexture2D(&tex2Ddesc, NULL, &g_color.pBuffer);
 | 
			
		||||
  AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
 | 
			
		||||
  hr = g_pd3dDevice->CreateShaderResourceView(g_color.pBuffer, NULL,
 | 
			
		||||
                                              &g_color.pBufferSRV);
 | 
			
		||||
  AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
 | 
			
		||||
  hr = g_pd3dDevice->CreateRenderTargetView(g_color.pBuffer, NULL,
 | 
			
		||||
                                            &g_color.pBufferRTV);
 | 
			
		||||
  AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
 | 
			
		||||
  // Create a buffer which will contain the resulting histogram and the SRV to
 | 
			
		||||
  // plug it
 | 
			
		||||
  D3D10_BUFFER_DESC bufferDesc;
 | 
			
		||||
  bufferDesc.Usage = D3D10_USAGE_DEFAULT;
 | 
			
		||||
  // NOTE: allocation of more than what is needed to display in the shader
 | 
			
		||||
  // but this 64 factor is required for CUDA to work with this buffer (see
 | 
			
		||||
  // BLOCK_N in .cu code...)
 | 
			
		||||
  bufferDesc.ByteWidth =
 | 
			
		||||
      sizeof(unsigned int) * g_HistogramSize * 64 /*BLOCK_N*/;
 | 
			
		||||
  bufferDesc.BindFlags = D3D10_BIND_SHADER_RESOURCE;
 | 
			
		||||
  bufferDesc.CPUAccessFlags = 0;
 | 
			
		||||
  bufferDesc.MiscFlags = 0;
 | 
			
		||||
  // useless values... we could remove this...
 | 
			
		||||
  unsigned int values[256 * 64];
 | 
			
		||||
 | 
			
		||||
  for (int i = 0; i < 256 * 64; i++) {
 | 
			
		||||
    values[i] = i;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  D3D10_SUBRESOURCE_DATA data;
 | 
			
		||||
  data.pSysMem = values;
 | 
			
		||||
  data.SysMemPitch = 0;
 | 
			
		||||
  data.SysMemSlicePitch = 0;
 | 
			
		||||
 | 
			
		||||
  hr = g_pd3dDevice->CreateBuffer(&bufferDesc, &data, &g_histogram.pBuffer);
 | 
			
		||||
  AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
 | 
			
		||||
  D3D10_SHADER_RESOURCE_VIEW_DESC bufferSRVDesc;
 | 
			
		||||
  bufferSRVDesc.Format = DXGI_FORMAT_R32_UINT;
 | 
			
		||||
  bufferSRVDesc.ViewDimension = D3D10_SRV_DIMENSION_BUFFER;
 | 
			
		||||
  bufferSRVDesc.Buffer.ElementOffset = 0;
 | 
			
		||||
  bufferSRVDesc.Buffer.ElementWidth =
 | 
			
		||||
      g_HistogramSize;  // 4*sizeof(unsigned int);
 | 
			
		||||
 | 
			
		||||
  hr = g_pd3dDevice->CreateShaderResourceView(
 | 
			
		||||
      g_histogram.pBuffer, &bufferSRVDesc, &g_histogram.pBufferSRV);
 | 
			
		||||
  AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
  // Create the equivalent as a cuda staging buffer that we'll use to write from
 | 
			
		||||
  // Cuda. Then we'll copy it to the texture
 | 
			
		||||
  // cudaMalloc(g_histogram.cudaBuffer, sizeof(float) * g_HistogramSize;
 | 
			
		||||
  // getLastCudaError("cudaMallocPitch (g_histogram) failed");
 | 
			
		||||
 | 
			
		||||
  // Setup the viewport
 | 
			
		||||
  D3D10_VIEWPORT vp;
 | 
			
		||||
  vp.Width = g_WindowWidth;
 | 
			
		||||
  vp.Height = g_WindowHeight;
 | 
			
		||||
  vp.MinDepth = 0.0f;
 | 
			
		||||
  vp.MaxDepth = 1.0f;
 | 
			
		||||
  vp.TopLeftX = 0;
 | 
			
		||||
  vp.TopLeftY = 0;
 | 
			
		||||
  g_pd3dDevice->RSSetViewports(1, &vp);
 | 
			
		||||
 | 
			
		||||
  // Setup the effect
 | 
			
		||||
  {
 | 
			
		||||
    ID3D10Blob *pErrors = NULL;
 | 
			
		||||
    ID3D10Blob *pCompiledEffect;
 | 
			
		||||
    hr = sFnPtr_D3D10CompileEffectFromMemory((void *)g_displayEffectSrc,
 | 
			
		||||
                                             sizeof(g_displayEffectSrc), NULL,
 | 
			
		||||
                                             NULL,  // pDefines
 | 
			
		||||
                                             NULL,  // pIncludes
 | 
			
		||||
                                             0,     // HLSL flags
 | 
			
		||||
                                             0,     // FXFlags
 | 
			
		||||
                                             &pCompiledEffect, &pErrors);
 | 
			
		||||
 | 
			
		||||
    if (pErrors) {
 | 
			
		||||
      LPVOID l_pError = NULL;
 | 
			
		||||
      l_pError = pErrors->GetBufferPointer();  // then cast to a char* to see it
 | 
			
		||||
                                               // in the locals window
 | 
			
		||||
      fprintf(stdout, "Compilation error: \n %s", (char *)l_pError);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
 | 
			
		||||
    hr = sFnPtr_D3D10CreateEffectFromMemory(
 | 
			
		||||
        pCompiledEffect->GetBufferPointer(), pCompiledEffect->GetBufferSize(),
 | 
			
		||||
        0,  // FXFlags
 | 
			
		||||
        g_pd3dDevice, NULL, &g_pDisplayEffect);
 | 
			
		||||
    pCompiledEffect->Release();
 | 
			
		||||
 | 
			
		||||
    g_pDisplayTechnique = g_pDisplayEffect->GetTechniqueByName("Render");
 | 
			
		||||
 | 
			
		||||
    g_pTime = g_pDisplayEffect->GetVariableByName("g_Time")->AsScalar();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Setup the effect
 | 
			
		||||
  {
 | 
			
		||||
    ID3D10Blob *pCompiledEffect;
 | 
			
		||||
    ID3D10Blob *pErrors = NULL;
 | 
			
		||||
    hr = sFnPtr_D3D10CompileEffectFromMemory((void *)g_compositeEffectSrc,
 | 
			
		||||
                                             sizeof(g_compositeEffectSrc), NULL,
 | 
			
		||||
                                             NULL,  // pDefines
 | 
			
		||||
                                             NULL,  // pIncludes
 | 
			
		||||
                                             0,     // HLSL flags
 | 
			
		||||
                                             0,     // FXFlags
 | 
			
		||||
                                             &pCompiledEffect, &pErrors);
 | 
			
		||||
 | 
			
		||||
    if (pErrors) {
 | 
			
		||||
      LPVOID l_pError = NULL;
 | 
			
		||||
      l_pError = pErrors->GetBufferPointer();  // then cast to a char* to see it
 | 
			
		||||
                                               // in the locals window
 | 
			
		||||
      fprintf(stdout, "Compilation error: \n %s", (char *)l_pError);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
 | 
			
		||||
    hr = sFnPtr_D3D10CreateEffectFromMemory(
 | 
			
		||||
        pCompiledEffect->GetBufferPointer(), pCompiledEffect->GetBufferSize(),
 | 
			
		||||
        0,  // FXFlags
 | 
			
		||||
        g_pd3dDevice, NULL, &g_pCompositeEffect);
 | 
			
		||||
    pCompiledEffect->Release();
 | 
			
		||||
 | 
			
		||||
    g_pCompositeTechnique = g_pCompositeEffect->GetTechniqueByName("Render");
 | 
			
		||||
 | 
			
		||||
    g_pvQuadRect =
 | 
			
		||||
        g_pCompositeEffect->GetVariableByName("g_vQuadRect")->AsVector();
 | 
			
		||||
    g_pUseCase = g_pCompositeEffect->GetVariableByName("g_UseCase")->AsScalar();
 | 
			
		||||
 | 
			
		||||
    g_pTexture2D = g_pCompositeEffect->GetVariableByName("g_Texture2D")
 | 
			
		||||
                       ->AsShaderResource();
 | 
			
		||||
    g_pTexture2D->SetResource(g_color.pBufferSRV);
 | 
			
		||||
 | 
			
		||||
    g_pHistogram = g_pCompositeEffect->GetVariableByName("g_Histogram")
 | 
			
		||||
                       ->AsShaderResource();
 | 
			
		||||
    g_pHistogram->SetResource(g_histogram.pBufferSRV);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  D3D10_RASTERIZER_DESC rasterizerState;
 | 
			
		||||
  rasterizerState.FillMode = D3D10_FILL_SOLID;
 | 
			
		||||
  rasterizerState.CullMode = D3D10_CULL_FRONT;
 | 
			
		||||
  rasterizerState.FrontCounterClockwise = false;
 | 
			
		||||
  rasterizerState.DepthBias = false;
 | 
			
		||||
  rasterizerState.DepthBiasClamp = 0;
 | 
			
		||||
  rasterizerState.SlopeScaledDepthBias = 0;
 | 
			
		||||
  rasterizerState.DepthClipEnable = false;
 | 
			
		||||
  rasterizerState.ScissorEnable = false;
 | 
			
		||||
  rasterizerState.MultisampleEnable = false;
 | 
			
		||||
  rasterizerState.AntialiasedLineEnable = false;
 | 
			
		||||
  g_pd3dDevice->CreateRasterizerState(&rasterizerState, &g_pRasterState);
 | 
			
		||||
  g_pd3dDevice->RSSetState(g_pRasterState);
 | 
			
		||||
 | 
			
		||||
  return S_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: Cleanup()
 | 
			
		||||
// Desc: Releases all previously initialized objects
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
VOID Cleanup() {
 | 
			
		||||
  if (g_histogram.pBuffer != NULL) {
 | 
			
		||||
    // Unregister vertex buffer
 | 
			
		||||
    cudaGraphicsUnregisterResource(g_histogram.cudaResource);
 | 
			
		||||
    getLastCudaError("cudaGraphicsUnregisterResource failed");
 | 
			
		||||
    g_histogram.pBuffer->Release();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (g_histogram.pBufferSRV != NULL) {
 | 
			
		||||
    g_histogram.pBufferSRV->Release();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (g_pDisplayEffect != NULL) {
 | 
			
		||||
    g_pDisplayEffect->Release();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (g_pCompositeEffect != NULL) {
 | 
			
		||||
    g_pCompositeEffect->Release();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (g_color.pBufferSRV != NULL) {
 | 
			
		||||
    g_color.pBufferSRV->Release();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (g_color.pBufferRTV != NULL) {
 | 
			
		||||
    g_color.pBufferRTV->Release();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (g_color.pBuffer != NULL) {
 | 
			
		||||
    // Unregister vertex buffer
 | 
			
		||||
    cudaGraphicsUnregisterResource(g_color.cudaResource);
 | 
			
		||||
    getLastCudaError("cudaD3D10UnregisterResource failed");
 | 
			
		||||
    g_color.pBuffer->Release();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (g_pRasterState != NULL) {
 | 
			
		||||
    g_pRasterState->Release();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (g_pSwapChainRTV != NULL) {
 | 
			
		||||
    g_pSwapChainRTV->Release();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (g_pSwapChain != NULL) {
 | 
			
		||||
    g_pSwapChain->Release();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (g_pd3dDevice != NULL) {
 | 
			
		||||
    g_pd3dDevice->Release();
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: Render()
 | 
			
		||||
// Desc: Draws the scene
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
VOID Render() {
 | 
			
		||||
  g_pd3dDevice->RSSetState(g_pRasterState);
 | 
			
		||||
 | 
			
		||||
  // Draw frame
 | 
			
		||||
  {
 | 
			
		||||
    static float time = 0.f;
 | 
			
		||||
    time += 0.001f;
 | 
			
		||||
    g_pTime->SetFloat(time);
 | 
			
		||||
 | 
			
		||||
    // Clear the Color to a black color
 | 
			
		||||
    float ClearColor[4] = {0.f, 0.1f, 0.1f, 1.f};
 | 
			
		||||
    g_pd3dDevice->ClearRenderTargetView(g_color.pBufferRTV, ClearColor);
 | 
			
		||||
    g_pd3dDevice->OMSetRenderTargets(1, &g_color.pBufferRTV, NULL);
 | 
			
		||||
 | 
			
		||||
    g_pd3dDevice->IASetInputLayout(0);
 | 
			
		||||
    g_pd3dDevice->IASetPrimitiveTopology(
 | 
			
		||||
        D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
 | 
			
		||||
 | 
			
		||||
    g_pDisplayTechnique->GetPassByIndex(0)->Apply(0);
 | 
			
		||||
    g_pd3dDevice->DrawInstanced(4, 400, 0, 0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Run CUDA to compute the histogram
 | 
			
		||||
  runCuda();
 | 
			
		||||
 | 
			
		||||
  // draw the 2d texture
 | 
			
		||||
  {
 | 
			
		||||
    // Clear the Color to a black color
 | 
			
		||||
    float ClearColor[4] = {0, 0, 0, 1.f};
 | 
			
		||||
    g_pd3dDevice->ClearRenderTargetView(g_pSwapChainRTV, ClearColor);
 | 
			
		||||
    g_pd3dDevice->OMSetRenderTargets(1, &g_pSwapChainRTV, NULL);
 | 
			
		||||
 | 
			
		||||
    g_pd3dDevice->IASetInputLayout(0);
 | 
			
		||||
    g_pd3dDevice->IASetPrimitiveTopology(
 | 
			
		||||
        D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
 | 
			
		||||
 | 
			
		||||
    g_pTexture2D->SetResource(g_color.pBufferSRV);
 | 
			
		||||
    g_pHistogram->SetResource(g_histogram.pBufferSRV);
 | 
			
		||||
 | 
			
		||||
    g_pUseCase->SetInt(0);
 | 
			
		||||
    float quadRect1[4] = {-1.0f, -0.8f, 2.0f, 1.8f};
 | 
			
		||||
    g_pvQuadRect->SetFloatVector((float *)&quadRect1);
 | 
			
		||||
 | 
			
		||||
    g_pCompositeTechnique->GetPassByIndex(0)->Apply(0);
 | 
			
		||||
    g_pd3dDevice->Draw(4, 0);
 | 
			
		||||
 | 
			
		||||
    g_pUseCase->SetInt(1);
 | 
			
		||||
    float quadRect2[4] = {-0.8f, -0.99f, 1.6f, 0.19f};
 | 
			
		||||
    g_pvQuadRect->SetFloatVector((float *)&quadRect2);
 | 
			
		||||
 | 
			
		||||
    g_pCompositeTechnique->GetPassByIndex(0)->Apply(0);
 | 
			
		||||
    g_pd3dDevice->Draw(4, 0);
 | 
			
		||||
 | 
			
		||||
    g_pTexture2D->SetResource(NULL);
 | 
			
		||||
    g_pHistogram->SetResource(NULL);
 | 
			
		||||
    g_pCompositeTechnique->GetPassByIndex(0)->Apply(0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Present the backbuffer contents to the display
 | 
			
		||||
  g_pSwapChain->Present(0, 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: MsgProc()
 | 
			
		||||
// Desc: The window's message handler
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) {
 | 
			
		||||
  switch (msg) {
 | 
			
		||||
    case WM_DESTROY:
 | 
			
		||||
    case WM_KEYDOWN:
 | 
			
		||||
      if (msg != WM_KEYDOWN || wParam == 27) {
 | 
			
		||||
        g_bDone = true;
 | 
			
		||||
        Cleanup();
 | 
			
		||||
        PostQuitMessage(0);
 | 
			
		||||
        return 0;
 | 
			
		||||
      }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return DefWindowProc(hWnd, msg, wParam, lParam);
 | 
			
		||||
}
 | 
			
		||||
@ -1,223 +0,0 @@
 | 
			
		||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
/* This example demonstrates how to use the CUDA Direct3D bindings with the
 | 
			
		||||
 * runtime API.
 | 
			
		||||
 * Device code.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifndef SIMPLED3D10RENDERTARGET_KERNEL_CU
 | 
			
		||||
#define SIMPLED3D10RENDERTARGET_KERNEL_CU
 | 
			
		||||
 | 
			
		||||
// includes, C string library
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
// includes, cuda
 | 
			
		||||
#include <cuda.h>
 | 
			
		||||
#include <builtin_types.h>
 | 
			
		||||
#include <cuda_runtime_api.h>
 | 
			
		||||
 | 
			
		||||
// includes, project
 | 
			
		||||
#include <helper_cuda.h>  // includes cuda.h and cuda_runtime_api.h
 | 
			
		||||
//#include "checkCudaErrors"
 | 
			
		||||
 | 
			
		||||
#define BIN_COUNT 256
 | 
			
		||||
#define HISTOGRAM_SIZE (BIN_COUNT * sizeof(unsigned int))
 | 
			
		||||
 | 
			
		||||
texture<uchar4, 2, cudaReadModeElementType> colorTex;
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// GPU-specific definitions
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Fast mul on G8x / G9x / G100
 | 
			
		||||
#define IMUL(a, b) __mul24(a, b)
 | 
			
		||||
 | 
			
		||||
// Machine warp size
 | 
			
		||||
// G80's warp size is 32 threads
 | 
			
		||||
#define WARP_LOG2SIZE 5
 | 
			
		||||
 | 
			
		||||
// Warps in thread block for histogram256Kernel()
 | 
			
		||||
#define WARP_N 6
 | 
			
		||||
 | 
			
		||||
// Corresponding thread block size in threads for histogram256Kernel()
 | 
			
		||||
#define THREAD_N (WARP_N << WARP_LOG2SIZE)
 | 
			
		||||
 | 
			
		||||
// Total histogram size (in counters) per thread block for histogram256Kernel()
 | 
			
		||||
#define BLOCK_MEMORY (WARP_N * BIN_COUNT)
 | 
			
		||||
 | 
			
		||||
// Thread block count for histogram256Kernel()
 | 
			
		||||
#define BLOCK_N 64
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// If threadPos == threadIdx.x, there are always  4-way bank conflicts,
 | 
			
		||||
// since each group of 16 threads (half-warp) accesses different bytes,
 | 
			
		||||
// but only within 4 shared memory banks. Having shuffled bits of threadIdx.x
 | 
			
		||||
// as in histogram64GPU(), each half-warp accesses different shared memory banks
 | 
			
		||||
// avoiding any bank conflicts at all.
 | 
			
		||||
// Refer to the supplied whitepaper for detailed explanations.
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
__device__ inline void addData256(volatile unsigned int *s_WarpHist,
 | 
			
		||||
                                  unsigned int data, unsigned int threadTag) {
 | 
			
		||||
  unsigned int count;
 | 
			
		||||
 | 
			
		||||
  do {
 | 
			
		||||
    count = s_WarpHist[data] & 0x07FFFFFFU;
 | 
			
		||||
    count = threadTag | (count + 1);
 | 
			
		||||
    s_WarpHist[data] = count;
 | 
			
		||||
  } while (s_WarpHist[data] != count);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Main histogram calculation kernel
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
static __global__ void histogramTex256Kernel(unsigned int *d_Result,
 | 
			
		||||
                                             unsigned int width,
 | 
			
		||||
                                             unsigned int height, int dataN) {
 | 
			
		||||
  // Current global thread index
 | 
			
		||||
  const int globalTid = IMUL(blockIdx.x, blockDim.x) + threadIdx.x;
 | 
			
		||||
  // Total number of threads in the compute grid
 | 
			
		||||
  const int numThreads = IMUL(blockDim.x, gridDim.x);
 | 
			
		||||
 | 
			
		||||
  // Thread tag for addData256()
 | 
			
		||||
  // WARP_LOG2SIZE higher bits of counter values are tagged
 | 
			
		||||
  // by lower WARP_LOG2SIZE threadID bits
 | 
			
		||||
  const unsigned int threadTag = threadIdx.x << (32 - WARP_LOG2SIZE);
 | 
			
		||||
 | 
			
		||||
  // Shared memory storage for each warp
 | 
			
		||||
  volatile __shared__ unsigned int s_Hist[BLOCK_MEMORY];
 | 
			
		||||
 | 
			
		||||
  // Current warp shared memory base
 | 
			
		||||
  const int warpBase = (threadIdx.x >> WARP_LOG2SIZE) * BIN_COUNT;
 | 
			
		||||
 | 
			
		||||
  // Clear shared memory buffer for current thread block before processing
 | 
			
		||||
  for (int pos = threadIdx.x; pos < BLOCK_MEMORY; pos += blockDim.x)
 | 
			
		||||
    s_Hist[pos] = 0;
 | 
			
		||||
 | 
			
		||||
  // Cycle through the entire data set, update subhistograms for each warp
 | 
			
		||||
  __syncthreads();
 | 
			
		||||
 | 
			
		||||
  for (int pos = globalTid; pos < dataN; pos += numThreads) {
 | 
			
		||||
    // NOTE: check this... Not sure this is what needs to be done
 | 
			
		||||
    int py = pos / width;
 | 
			
		||||
    int px = pos - (py * width);
 | 
			
		||||
    uchar4 data4 = tex2D(colorTex, px, py);
 | 
			
		||||
 | 
			
		||||
    addData256(s_Hist + warpBase, (data4.x), threadTag);
 | 
			
		||||
    addData256(s_Hist + warpBase, (data4.y), threadTag);
 | 
			
		||||
    addData256(s_Hist + warpBase, (data4.z), threadTag);
 | 
			
		||||
    addData256(s_Hist + warpBase, (data4.w), threadTag);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  __syncthreads();
 | 
			
		||||
 | 
			
		||||
  // Merge per-warp histograms into per-block and write to global memory
 | 
			
		||||
  for (int pos = threadIdx.x; pos < BIN_COUNT; pos += blockDim.x) {
 | 
			
		||||
    unsigned int sum = 0;
 | 
			
		||||
 | 
			
		||||
    for (int base = 0; base < BLOCK_MEMORY; base += BIN_COUNT)
 | 
			
		||||
      sum += s_Hist[base + pos] & 0x07FFFFFFU;
 | 
			
		||||
 | 
			
		||||
    d_Result[blockIdx.x * BIN_COUNT + pos] = sum;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Merge BLOCK_N subhistograms of BIN_COUNT bins into final histogram
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// gridDim.x   == BIN_COUNT
 | 
			
		||||
// blockDim.x  == BLOCK_N
 | 
			
		||||
// blockIdx.x  == bin counter processed by current block
 | 
			
		||||
// threadIdx.x == subhistogram index
 | 
			
		||||
static __global__ void mergeHistogramTex256Kernel(unsigned int *d_Result) {
 | 
			
		||||
  __shared__ unsigned int data[BLOCK_N];
 | 
			
		||||
 | 
			
		||||
  // Reads are uncoalesced, but this final stage takes
 | 
			
		||||
  // only a fraction of total processing time
 | 
			
		||||
  data[threadIdx.x] = d_Result[threadIdx.x * BIN_COUNT + blockIdx.x];
 | 
			
		||||
 | 
			
		||||
  for (int stride = BLOCK_N / 2; stride > 0; stride >>= 1) {
 | 
			
		||||
    __syncthreads();
 | 
			
		||||
 | 
			
		||||
    if (threadIdx.x < stride) data[threadIdx.x] += data[threadIdx.x + stride];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (threadIdx.x == 0) d_Result[blockIdx.x] = data[0];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Host interface to GPU histogram
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
extern "C" void checkCudaError() {
 | 
			
		||||
  cudaError_t err = cudaGetLastError();
 | 
			
		||||
 | 
			
		||||
  if (cudaSuccess != err) {
 | 
			
		||||
    fprintf(stderr, "Cuda error: %s.\n", cudaGetErrorString(err));
 | 
			
		||||
    exit(2);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Maximum block count for histogram64kernel()
 | 
			
		||||
// Limits input data size to 756MB
 | 
			
		||||
// const int MAX_BLOCK_N = 16384;
 | 
			
		||||
 | 
			
		||||
// Internal memory allocation
 | 
			
		||||
// const int BLOCK_N2 = 32;
 | 
			
		||||
 | 
			
		||||
extern "C" void createHistogramTex(unsigned int *h_Result, unsigned int width,
 | 
			
		||||
                                   unsigned int height, cudaArray *colorArray) {
 | 
			
		||||
  cudaBindTextureToArray(colorTex, colorArray);
 | 
			
		||||
  checkCudaError();
 | 
			
		||||
 | 
			
		||||
  histogramTex256Kernel<<<BLOCK_N, THREAD_N>>>(h_Result, width, height,
 | 
			
		||||
                                               width * height / 4);
 | 
			
		||||
  checkCudaError();
 | 
			
		||||
 | 
			
		||||
  mergeHistogramTex256Kernel<<<BIN_COUNT, BLOCK_N>>>(h_Result);
 | 
			
		||||
  checkCudaError();
 | 
			
		||||
 | 
			
		||||
  cudaUnbindTexture(colorTex);
 | 
			
		||||
  checkCudaError();
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
    // Dummy fill test
 | 
			
		||||
    unsigned int toto[256];
 | 
			
		||||
 | 
			
		||||
    for (int i=0; i<256; i++)
 | 
			
		||||
    {
 | 
			
		||||
        toto[i] = i * 100;
 | 
			
		||||
    }
 | 
			
		||||
    cudaMemcpy(h_Result, toto, HISTOGRAM_SIZE, cudaMemcpyHostToDevice);
 | 
			
		||||
#endif
 | 
			
		||||
  checkCudaError();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern "C" void bindArrayToTexture(cudaArray *pArray) {}
 | 
			
		||||
 | 
			
		||||
#endif  // #ifndef SIMPLED3D10RENDERTARGET_KERNEL_CU
 | 
			
		||||
@ -1,46 +0,0 @@
 | 
			
		||||
################################################################################
 | 
			
		||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
#
 | 
			
		||||
# Redistribution and use in source and binary forms, with or without
 | 
			
		||||
# modification, are permitted provided that the following conditions
 | 
			
		||||
# are met:
 | 
			
		||||
#  * Redistributions of source code must retain the above copyright
 | 
			
		||||
#    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
#  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
#    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
#    documentation and/or other materials provided with the distribution.
 | 
			
		||||
#  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
#    contributors may be used to endorse or promote products derived
 | 
			
		||||
#    from this software without specific prior written permission.
 | 
			
		||||
#
 | 
			
		||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
#
 | 
			
		||||
################################################################################
 | 
			
		||||
#
 | 
			
		||||
# Makefile project only supported on Mac OS X and Linux Platforms)
 | 
			
		||||
#
 | 
			
		||||
################################################################################
 | 
			
		||||
 | 
			
		||||
# Target rules
 | 
			
		||||
all: build
 | 
			
		||||
 | 
			
		||||
build:
 | 
			
		||||
	$(info >>> WARNING - simpleD3D10Texture is not supported on Linux - waiving sample <<<)
 | 
			
		||||
 | 
			
		||||
run: build
 | 
			
		||||
 | 
			
		||||
testrun: build
 | 
			
		||||
 | 
			
		||||
clean:
 | 
			
		||||
 | 
			
		||||
clobber: clean
 | 
			
		||||
@ -1,49 +0,0 @@
 | 
			
		||||
# simpleD3D10Texture - Simple D3D10 Texture
 | 
			
		||||
 | 
			
		||||
## Description
 | 
			
		||||
 | 
			
		||||
Simple program which demonstrates how to interoperate CUDA with Direct3D10 Texture.  The program creates a number of D3D10 Textures (2D, 3D, and CubeMap) which are generated from CUDA kernels. Direct3D then renders the results on the screen.  A Direct3D10 Capable device is required.
 | 
			
		||||
 | 
			
		||||
## Key Concepts
 | 
			
		||||
 | 
			
		||||
Graphics Interop, Texture
 | 
			
		||||
 | 
			
		||||
## Supported SM Architectures
 | 
			
		||||
 | 
			
		||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.9 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 | 
			
		||||
 | 
			
		||||
## Supported OSes
 | 
			
		||||
 | 
			
		||||
Windows
 | 
			
		||||
 | 
			
		||||
## Supported CPU Architecture
 | 
			
		||||
 | 
			
		||||
x86_64
 | 
			
		||||
 | 
			
		||||
## CUDA APIs involved
 | 
			
		||||
 | 
			
		||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 | 
			
		||||
cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaGetErrorString, cudaFree, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGetDeviceProperties
 | 
			
		||||
 | 
			
		||||
## Dependencies needed to build/run
 | 
			
		||||
[DirectX](../../../README.md#directx)
 | 
			
		||||
 | 
			
		||||
## Prerequisites
 | 
			
		||||
 | 
			
		||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 | 
			
		||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 | 
			
		||||
 | 
			
		||||
## Build and Run
 | 
			
		||||
 | 
			
		||||
### Windows
 | 
			
		||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
 | 
			
		||||
```
 | 
			
		||||
*_vs<version>.sln - for Visual Studio <version>
 | 
			
		||||
```
 | 
			
		||||
Each individual sample has its own set of solution files in its directory:
 | 
			
		||||
 | 
			
		||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
 | 
			
		||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
 | 
			
		||||
 | 
			
		||||
## References (for more details)
 | 
			
		||||
 | 
			
		||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							@ -1,970 +0,0 @@
 | 
			
		||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
/* This example demonstrates how to use the CUDA Direct3D bindings to
 | 
			
		||||
 * transfer data between CUDA and DX9 2D, CubeMap, and Volume Textures.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#pragma warning(disable : 4312)
 | 
			
		||||
 | 
			
		||||
#include <windows.h>
 | 
			
		||||
#include <mmsystem.h>
 | 
			
		||||
 | 
			
		||||
// This header inclues all the necessary D3D10 and CUDA includes
 | 
			
		||||
#include <dynlink_d3d10.h>
 | 
			
		||||
#include <cuda_runtime_api.h>
 | 
			
		||||
#include <cuda_d3d10_interop.h>
 | 
			
		||||
 | 
			
		||||
// includes, project
 | 
			
		||||
#include <rendercheck_d3d10.h>
 | 
			
		||||
#include <helper_cuda.h>  // helper functions for CUDA error checking and initialization
 | 
			
		||||
 | 
			
		||||
#define MAX_EPSILON 10
 | 
			
		||||
 | 
			
		||||
static char *SDK_name = "simpleD3D10Texture";
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Global variables
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
IDXGIAdapter *g_pCudaCapableAdapter = NULL;  // Adapter to use
 | 
			
		||||
ID3D10Device *g_pd3dDevice = NULL;           // Our rendering device
 | 
			
		||||
IDXGISwapChain *g_pSwapChain = NULL;         // The swap chain of the window
 | 
			
		||||
ID3D10RenderTargetView *g_pSwapChainRTV =
 | 
			
		||||
    NULL;  // The Render target view on the swap chain ( used for clear)
 | 
			
		||||
ID3D10RasterizerState *g_pRasterState = NULL;
 | 
			
		||||
 | 
			
		||||
ID3D10InputLayout *g_pInputLayout = NULL;
 | 
			
		||||
ID3D10Effect *g_pSimpleEffect = NULL;
 | 
			
		||||
ID3D10EffectTechnique *g_pSimpleTechnique = NULL;
 | 
			
		||||
ID3D10EffectVectorVariable *g_pvQuadRect = NULL;
 | 
			
		||||
ID3D10EffectScalarVariable *g_pUseCase = NULL;
 | 
			
		||||
ID3D10EffectShaderResourceVariable *g_pTexture2D = NULL;
 | 
			
		||||
ID3D10EffectShaderResourceVariable *g_pTexture3D = NULL;
 | 
			
		||||
ID3D10EffectShaderResourceVariable *g_pTextureCube = NULL;
 | 
			
		||||
 | 
			
		||||
static const char g_simpleEffectSrc[] =
 | 
			
		||||
    "float4 g_vQuadRect; \n"
 | 
			
		||||
    "int g_UseCase; \n"
 | 
			
		||||
    "Texture2D g_Texture2D; \n"
 | 
			
		||||
    "Texture3D g_Texture3D; \n"
 | 
			
		||||
    "TextureCube g_TextureCube; \n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "SamplerState samLinear{ \n"
 | 
			
		||||
    "    Filter = MIN_MAG_LINEAR_MIP_POINT; \n"
 | 
			
		||||
    "};\n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "struct Fragment{ \n"
 | 
			
		||||
    "    float4 Pos : SV_POSITION;\n"
 | 
			
		||||
    "    float3 Tex : TEXCOORD0; };\n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "Fragment VS( uint vertexId : SV_VertexID )\n"
 | 
			
		||||
    "{\n"
 | 
			
		||||
    "    Fragment f;\n"
 | 
			
		||||
    "    f.Tex = float3( 0.f, 0.f, 0.f); \n"
 | 
			
		||||
    "    if (vertexId == 1) f.Tex.x = 1.f; \n"
 | 
			
		||||
    "    else if (vertexId == 2) f.Tex.y = 1.f; \n"
 | 
			
		||||
    "    else if (vertexId == 3) f.Tex.xy = float2(1.f, 1.f); \n"
 | 
			
		||||
    "    \n"
 | 
			
		||||
    "    f.Pos = float4( g_vQuadRect.xy + f.Tex * g_vQuadRect.zw, 0, 1);\n"
 | 
			
		||||
    "    \n"
 | 
			
		||||
    "    if (g_UseCase == 1) { \n"
 | 
			
		||||
    "        if (vertexId == 1) f.Tex.z = 0.5f; \n"
 | 
			
		||||
    "        else if (vertexId == 2) f.Tex.z = 0.5f; \n"
 | 
			
		||||
    "        else if (vertexId == 3) f.Tex.z = 1.f; \n"
 | 
			
		||||
    "    } \n"
 | 
			
		||||
    "    else if (g_UseCase >= 2) { \n"
 | 
			
		||||
    "        f.Tex.xy = f.Tex.xy * 2.f - 1.f; \n"
 | 
			
		||||
    "    } \n"
 | 
			
		||||
    "    return f;\n"
 | 
			
		||||
    "}\n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "float4 PS( Fragment f ) : SV_Target\n"
 | 
			
		||||
    "{\n"
 | 
			
		||||
    "    if (g_UseCase == 0) return g_Texture2D.Sample( samLinear, f.Tex.xy ); "
 | 
			
		||||
    "\n"
 | 
			
		||||
    "    else if (g_UseCase == 1) return g_Texture3D.Sample( samLinear, f.Tex "
 | 
			
		||||
    "); \n"
 | 
			
		||||
    "    else if (g_UseCase == 2) return g_TextureCube.Sample( samLinear, "
 | 
			
		||||
    "float3(f.Tex.xy, 1.0) ); \n"
 | 
			
		||||
    "    else if (g_UseCase == 3) return g_TextureCube.Sample( samLinear, "
 | 
			
		||||
    "float3(f.Tex.xy, -1.0) ); \n"
 | 
			
		||||
    "    else if (g_UseCase == 4) return g_TextureCube.Sample( samLinear, "
 | 
			
		||||
    "float3(1.0, f.Tex.xy) ); \n"
 | 
			
		||||
    "    else if (g_UseCase == 5) return g_TextureCube.Sample( samLinear, "
 | 
			
		||||
    "float3(-1.0, f.Tex.xy) ); \n"
 | 
			
		||||
    "    else if (g_UseCase == 6) return g_TextureCube.Sample( samLinear, "
 | 
			
		||||
    "float3(f.Tex.x, 1.0, f.Tex.y) ); \n"
 | 
			
		||||
    "    else if (g_UseCase == 7) return g_TextureCube.Sample( samLinear, "
 | 
			
		||||
    "float3(f.Tex.x, -1.0, f.Tex.y) ); \n"
 | 
			
		||||
    "    else return float4(f.Tex, 1);\n"
 | 
			
		||||
    "}\n"
 | 
			
		||||
    "\n"
 | 
			
		||||
    "technique10 Render\n"
 | 
			
		||||
    "{\n"
 | 
			
		||||
    "    pass P0\n"
 | 
			
		||||
    "    {\n"
 | 
			
		||||
    "        SetVertexShader( CompileShader( vs_4_0, VS() ) );\n"
 | 
			
		||||
    "        SetGeometryShader( NULL );\n"
 | 
			
		||||
    "        SetPixelShader( CompileShader( ps_4_0, PS() ) );\n"
 | 
			
		||||
    "    }\n"
 | 
			
		||||
    "}\n"
 | 
			
		||||
    "\n";
 | 
			
		||||
 | 
			
		||||
// testing/tracing function used pervasively in tests.  if the condition is
 | 
			
		||||
// unsatisfied
 | 
			
		||||
// then spew and fail the function immediately (doing no cleanup)
 | 
			
		||||
#define AssertOrQuit(x)                                                  \
 | 
			
		||||
  if (!(x)) {                                                            \
 | 
			
		||||
    fprintf(stdout, "Assert unsatisfied in %s at %s:%d\n", __FUNCTION__, \
 | 
			
		||||
            __FILE__, __LINE__);                                         \
 | 
			
		||||
    return 1;                                                            \
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
bool g_bDone = false;
 | 
			
		||||
bool g_bPassed = true;
 | 
			
		||||
 | 
			
		||||
const unsigned int g_WindowWidth = 720;
 | 
			
		||||
const unsigned int g_WindowHeight = 720;
 | 
			
		||||
 | 
			
		||||
int g_iFrameToCompare = 10;
 | 
			
		||||
 | 
			
		||||
int *pArgc = NULL;
 | 
			
		||||
char **pArgv = NULL;
 | 
			
		||||
 | 
			
		||||
// Data structure for 2D texture shared between DX10 and CUDA
 | 
			
		||||
struct {
 | 
			
		||||
  ID3D10Texture2D *pTexture;
 | 
			
		||||
  ID3D10ShaderResourceView *pSRView;
 | 
			
		||||
  cudaGraphicsResource *cudaResource;
 | 
			
		||||
  void *cudaLinearMemory;
 | 
			
		||||
  size_t pitch;
 | 
			
		||||
  int width;
 | 
			
		||||
  int height;
 | 
			
		||||
} g_texture_2d;
 | 
			
		||||
 | 
			
		||||
// Data structure for volume textures shared between DX10 and CUDA
 | 
			
		||||
struct {
 | 
			
		||||
  ID3D10Texture3D *pTexture;
 | 
			
		||||
  ID3D10ShaderResourceView *pSRView;
 | 
			
		||||
  cudaGraphicsResource *cudaResource;
 | 
			
		||||
  void *cudaLinearMemory;
 | 
			
		||||
  size_t pitch;
 | 
			
		||||
  int width;
 | 
			
		||||
  int height;
 | 
			
		||||
  int depth;
 | 
			
		||||
} g_texture_3d;
 | 
			
		||||
 | 
			
		||||
// Data structure for cube texture shared between DX10 and CUDA
 | 
			
		||||
struct {
 | 
			
		||||
  ID3D10Texture2D *pTexture;
 | 
			
		||||
  ID3D10ShaderResourceView *pSRView;
 | 
			
		||||
  cudaGraphicsResource *cudaResource;
 | 
			
		||||
  void *cudaLinearMemory;
 | 
			
		||||
  size_t pitch;
 | 
			
		||||
  int size;
 | 
			
		||||
} g_texture_cube;
 | 
			
		||||
 | 
			
		||||
// The CUDA kernel launchers that get called
 | 
			
		||||
extern "C" {
 | 
			
		||||
bool cuda_texture_2d(void *surface, size_t width, size_t height, size_t pitch,
 | 
			
		||||
                     float t);
 | 
			
		||||
bool cuda_texture_3d(void *surface, int width, int height, int depth,
 | 
			
		||||
                     size_t pitch, size_t pitchslice, float t);
 | 
			
		||||
bool cuda_texture_cube(void *surface, int width, int height, size_t pitch,
 | 
			
		||||
                       int face, float t);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Forward declarations
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
HRESULT InitD3D(HWND hWnd);
 | 
			
		||||
HRESULT InitTextures();
 | 
			
		||||
 | 
			
		||||
void RunKernels();
 | 
			
		||||
void DrawScene();
 | 
			
		||||
void Cleanup();
 | 
			
		||||
void Render();
 | 
			
		||||
 | 
			
		||||
LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
 | 
			
		||||
 | 
			
		||||
#define NAME_LEN 512
 | 
			
		||||
 | 
			
		||||
bool findCUDADevice() {
 | 
			
		||||
  int nGraphicsGPU = 0;
 | 
			
		||||
  int deviceCount = 0;
 | 
			
		||||
  bool bFoundGraphics = false;
 | 
			
		||||
  char devname[NAME_LEN];
 | 
			
		||||
 | 
			
		||||
  // This function call returns 0 if there are no CUDA capable devices.
 | 
			
		||||
  cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
 | 
			
		||||
 | 
			
		||||
  if (error_id != cudaSuccess) {
 | 
			
		||||
    printf("cudaGetDeviceCount returned %d\n-> %s\n", (int)error_id,
 | 
			
		||||
           cudaGetErrorString(error_id));
 | 
			
		||||
    exit(EXIT_FAILURE);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (deviceCount == 0) {
 | 
			
		||||
    printf("> There are no device(s) supporting CUDA\n");
 | 
			
		||||
    return false;
 | 
			
		||||
  } else {
 | 
			
		||||
    printf("> Found %d CUDA Capable Device(s)\n", deviceCount);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Get CUDA device properties
 | 
			
		||||
  cudaDeviceProp deviceProp;
 | 
			
		||||
 | 
			
		||||
  for (int dev = 0; dev < deviceCount; ++dev) {
 | 
			
		||||
    cudaGetDeviceProperties(&deviceProp, dev);
 | 
			
		||||
    STRCPY(devname, NAME_LEN, deviceProp.name);
 | 
			
		||||
    printf("> GPU %d: %s\n", dev, devname);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool findDXDevice(char *dev_name) {
 | 
			
		||||
  HRESULT hr = S_OK;
 | 
			
		||||
  cudaError cuStatus;
 | 
			
		||||
 | 
			
		||||
  // Iterate through the candidate adapters
 | 
			
		||||
  IDXGIFactory *pFactory;
 | 
			
		||||
  hr = sFnPtr_CreateDXGIFactory(__uuidof(IDXGIFactory), (void **)(&pFactory));
 | 
			
		||||
 | 
			
		||||
  if (!SUCCEEDED(hr)) {
 | 
			
		||||
    printf("> No DXGI Factory created.\n");
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  UINT adapter = 0;
 | 
			
		||||
 | 
			
		||||
  for (; !g_pCudaCapableAdapter; ++adapter) {
 | 
			
		||||
    // Get a candidate DXGI adapter
 | 
			
		||||
    IDXGIAdapter *pAdapter = NULL;
 | 
			
		||||
    hr = pFactory->EnumAdapters(adapter, &pAdapter);
 | 
			
		||||
 | 
			
		||||
    if (FAILED(hr)) {
 | 
			
		||||
      break;  // no compatible adapters found
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Query to see if there exists a corresponding compute device
 | 
			
		||||
    int cuDevice;
 | 
			
		||||
    cuStatus = cudaD3D10GetDevice(&cuDevice, pAdapter);
 | 
			
		||||
    printLastCudaError("cudaD3D10GetDevice failed");  // This prints and resets
 | 
			
		||||
                                                      // the cudaError to
 | 
			
		||||
                                                      // cudaSuccess
 | 
			
		||||
 | 
			
		||||
    if (cudaSuccess == cuStatus) {
 | 
			
		||||
      // If so, mark it as the one against which to create our d3d10 device
 | 
			
		||||
      g_pCudaCapableAdapter = pAdapter;
 | 
			
		||||
      g_pCudaCapableAdapter->AddRef();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pAdapter->Release();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  printf("> Found %d D3D10 Adapater(s).\n", (int)adapter);
 | 
			
		||||
 | 
			
		||||
  pFactory->Release();
 | 
			
		||||
 | 
			
		||||
  if (!g_pCudaCapableAdapter) {
 | 
			
		||||
    printf("> Found 0 D3D10 Adapater(s) /w Compute capability.\n");
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  DXGI_ADAPTER_DESC adapterDesc;
 | 
			
		||||
  g_pCudaCapableAdapter->GetDesc(&adapterDesc);
 | 
			
		||||
  wcstombs_s(NULL, dev_name, 256, adapterDesc.Description, 128);
 | 
			
		||||
 | 
			
		||||
  printf("> Found 1 D3D10 Adapater(s) /w Compute capability.\n");
 | 
			
		||||
  printf("> %s\n", dev_name);
 | 
			
		||||
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Program main
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
int main(int argc, char *argv[]) {
 | 
			
		||||
  char device_name[256];
 | 
			
		||||
  char *ref_file = NULL;
 | 
			
		||||
 | 
			
		||||
  pArgc = &argc;
 | 
			
		||||
  pArgv = argv;
 | 
			
		||||
 | 
			
		||||
  printf("[%s] - Starting...\n", SDK_name);
 | 
			
		||||
 | 
			
		||||
  if (!findCUDADevice())  // Search for CUDA GPU
 | 
			
		||||
  {
 | 
			
		||||
    printf("> CUDA Device NOT found on \"%s\".. Exiting.\n", device_name);
 | 
			
		||||
    exit(EXIT_SUCCESS);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Search for D3D API (locate drivers, does not mean device is found)
 | 
			
		||||
  if (!dynlinkLoadD3D10API()) {
 | 
			
		||||
    printf("> D3D10 API libraries NOT found on.. Exiting.\n");
 | 
			
		||||
    dynlinkUnloadD3D10API();
 | 
			
		||||
    exit(EXIT_SUCCESS);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (!findDXDevice(device_name)) {  // Search for D3D Hardware Device
 | 
			
		||||
    printf("> D3D10 Graphics Device NOT found.. Exiting.\n");
 | 
			
		||||
    dynlinkUnloadD3D10API();
 | 
			
		||||
    exit(EXIT_SUCCESS);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // command line options
 | 
			
		||||
  if (argc > 1) {
 | 
			
		||||
    // automatied build testing harness
 | 
			
		||||
    if (checkCmdLineFlag(argc, (const char **)argv, "file"))
 | 
			
		||||
      getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
//
 | 
			
		||||
// create window
 | 
			
		||||
//
 | 
			
		||||
// Register the window class
 | 
			
		||||
#if 1
 | 
			
		||||
  WNDCLASSEX wc = {sizeof(WNDCLASSEX),
 | 
			
		||||
                   CS_CLASSDC,
 | 
			
		||||
                   MsgProc,
 | 
			
		||||
                   0L,
 | 
			
		||||
                   0L,
 | 
			
		||||
                   GetModuleHandle(NULL),
 | 
			
		||||
                   NULL,
 | 
			
		||||
                   NULL,
 | 
			
		||||
                   NULL,
 | 
			
		||||
                   NULL,
 | 
			
		||||
                   "CUDA SDK",
 | 
			
		||||
                   NULL};
 | 
			
		||||
  RegisterClassEx(&wc);
 | 
			
		||||
 | 
			
		||||
  // Create the application's window
 | 
			
		||||
  int xBorder = ::GetSystemMetrics(SM_CXSIZEFRAME);
 | 
			
		||||
  int yMenu = ::GetSystemMetrics(SM_CYMENU);
 | 
			
		||||
  int yBorder = ::GetSystemMetrics(SM_CYSIZEFRAME);
 | 
			
		||||
  HWND hWnd = CreateWindow(
 | 
			
		||||
      wc.lpszClassName, "CUDA/D3D10 Texture InterOP", WS_OVERLAPPEDWINDOW, 0, 0,
 | 
			
		||||
      g_WindowWidth + 2 * xBorder, g_WindowHeight + 2 * yBorder + yMenu, NULL,
 | 
			
		||||
      NULL, wc.hInstance, NULL);
 | 
			
		||||
#else
 | 
			
		||||
  static WNDCLASSEX wc = {
 | 
			
		||||
      sizeof(WNDCLASSEX),    CS_CLASSDC, MsgProc, 0L,   0L,
 | 
			
		||||
      GetModuleHandle(NULL), NULL,       NULL,    NULL, NULL,
 | 
			
		||||
      "CudaD3D9Tex",         NULL};
 | 
			
		||||
  RegisterClassEx(&wc);
 | 
			
		||||
  HWND hWnd = CreateWindow("CudaD3D9Tex", "CUDA D3D9 Texture Interop",
 | 
			
		||||
                           WS_OVERLAPPEDWINDOW, 0, 0, 800, 320,
 | 
			
		||||
                           GetDesktopWindow(), NULL, wc.hInstance, NULL);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  ShowWindow(hWnd, SW_SHOWDEFAULT);
 | 
			
		||||
  UpdateWindow(hWnd);
 | 
			
		||||
 | 
			
		||||
  // Initialize Direct3D
 | 
			
		||||
  if (SUCCEEDED(InitD3D(hWnd)) && SUCCEEDED(InitTextures())) {
 | 
			
		||||
    // 2D
 | 
			
		||||
    // register the Direct3D resources that we'll use
 | 
			
		||||
    // we'll read to and write from g_texture_2d, so don't set any special map
 | 
			
		||||
    // flags for it
 | 
			
		||||
    cudaGraphicsD3D10RegisterResource(&g_texture_2d.cudaResource,
 | 
			
		||||
                                      g_texture_2d.pTexture,
 | 
			
		||||
                                      cudaGraphicsRegisterFlagsNone);
 | 
			
		||||
    getLastCudaError("cudaGraphicsD3D10RegisterResource (g_texture_2d) failed");
 | 
			
		||||
    // cuda cannot write into the texture directly : the texture is seen as a
 | 
			
		||||
    // cudaArray and can only be mapped as a texture
 | 
			
		||||
    // Create a buffer so that cuda can write into it
 | 
			
		||||
    // pixel fmt is DXGI_FORMAT_R32G32B32A32_FLOAT
 | 
			
		||||
    cudaMallocPitch(&g_texture_2d.cudaLinearMemory, &g_texture_2d.pitch,
 | 
			
		||||
                    g_texture_2d.width * sizeof(float) * 4,
 | 
			
		||||
                    g_texture_2d.height);
 | 
			
		||||
    getLastCudaError("cudaMallocPitch (g_texture_2d) failed");
 | 
			
		||||
    cudaMemset(g_texture_2d.cudaLinearMemory, 1,
 | 
			
		||||
               g_texture_2d.pitch * g_texture_2d.height);
 | 
			
		||||
 | 
			
		||||
    // CUBE
 | 
			
		||||
    cudaGraphicsD3D10RegisterResource(&g_texture_cube.cudaResource,
 | 
			
		||||
                                      g_texture_cube.pTexture,
 | 
			
		||||
                                      cudaGraphicsRegisterFlagsNone);
 | 
			
		||||
    getLastCudaError(
 | 
			
		||||
        "cudaGraphicsD3D10RegisterResource (g_texture_cube) failed");
 | 
			
		||||
    // create the buffer. pixel fmt is DXGI_FORMAT_R8G8B8A8_SNORM
 | 
			
		||||
    cudaMallocPitch(&g_texture_cube.cudaLinearMemory, &g_texture_cube.pitch,
 | 
			
		||||
                    g_texture_cube.size * 4, g_texture_cube.size);
 | 
			
		||||
    getLastCudaError("cudaMallocPitch (g_texture_cube) failed");
 | 
			
		||||
    cudaMemset(g_texture_cube.cudaLinearMemory, 1,
 | 
			
		||||
               g_texture_cube.pitch * g_texture_cube.size);
 | 
			
		||||
    getLastCudaError("cudaMemset (g_texture_cube) failed");
 | 
			
		||||
 | 
			
		||||
    // 3D
 | 
			
		||||
    cudaGraphicsD3D10RegisterResource(&g_texture_3d.cudaResource,
 | 
			
		||||
                                      g_texture_3d.pTexture,
 | 
			
		||||
                                      cudaGraphicsRegisterFlagsNone);
 | 
			
		||||
    getLastCudaError("cudaGraphicsD3D10RegisterResource (g_texture_3d) failed");
 | 
			
		||||
    // create the buffer. pixel fmt is DXGI_FORMAT_R8G8B8A8_SNORM
 | 
			
		||||
    // cudaMallocPitch(&g_texture_3d.cudaLinearMemory, &g_texture_3d.pitch,
 | 
			
		||||
    // g_texture_3d.width * 4, g_texture_3d.height * g_texture_3d.depth);
 | 
			
		||||
    cudaMalloc(
 | 
			
		||||
        &g_texture_3d.cudaLinearMemory,
 | 
			
		||||
        g_texture_3d.width * 4 * g_texture_3d.height * g_texture_3d.depth);
 | 
			
		||||
    g_texture_3d.pitch = g_texture_3d.width * 4;
 | 
			
		||||
    getLastCudaError("cudaMallocPitch (g_texture_3d) failed");
 | 
			
		||||
    cudaMemset(g_texture_3d.cudaLinearMemory, 1,
 | 
			
		||||
               g_texture_3d.pitch * g_texture_3d.height * g_texture_3d.depth);
 | 
			
		||||
    getLastCudaError("cudaMemset (g_texture_3d) failed");
 | 
			
		||||
  } else {
 | 
			
		||||
    printf("> WARNING: No D3D10 Device found.\n");
 | 
			
		||||
    g_bPassed = true;
 | 
			
		||||
    exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // the main loop
 | 
			
		||||
  //
 | 
			
		||||
  while (false == g_bDone) {
 | 
			
		||||
    Render();
 | 
			
		||||
 | 
			
		||||
    //
 | 
			
		||||
    // handle I/O
 | 
			
		||||
    //
 | 
			
		||||
    MSG msg;
 | 
			
		||||
    ZeroMemory(&msg, sizeof(msg));
 | 
			
		||||
 | 
			
		||||
    while (msg.message != WM_QUIT) {
 | 
			
		||||
      if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE)) {
 | 
			
		||||
        TranslateMessage(&msg);
 | 
			
		||||
        DispatchMessage(&msg);
 | 
			
		||||
      } else {
 | 
			
		||||
        Render();
 | 
			
		||||
 | 
			
		||||
        if (ref_file) {
 | 
			
		||||
          for (int count = 0; count < g_iFrameToCompare; count++) {
 | 
			
		||||
            Render();
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          const char *cur_image_path = "simpleD3D10Texture.ppm";
 | 
			
		||||
 | 
			
		||||
          // Save a reference of our current test run image
 | 
			
		||||
          CheckRenderD3D10::ActiveRenderTargetToPPM(g_pd3dDevice,
 | 
			
		||||
                                                    cur_image_path);
 | 
			
		||||
 | 
			
		||||
          // compare to offical reference image, printing PASS or FAIL.
 | 
			
		||||
          g_bPassed = CheckRenderD3D10::PPMvsPPM(cur_image_path, ref_file,
 | 
			
		||||
                                                 argv[0], MAX_EPSILON, 0.15f);
 | 
			
		||||
 | 
			
		||||
          g_bDone = true;
 | 
			
		||||
 | 
			
		||||
          Cleanup();
 | 
			
		||||
 | 
			
		||||
          PostQuitMessage(0);
 | 
			
		||||
        } else {
 | 
			
		||||
          g_bPassed = true;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  // Release D3D Library (after message loop)
 | 
			
		||||
  dynlinkUnloadD3D10API();
 | 
			
		||||
 | 
			
		||||
  // Unregister windows class
 | 
			
		||||
  UnregisterClass(wc.lpszClassName, wc.hInstance);
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // and exit
 | 
			
		||||
  //
 | 
			
		||||
  printf("> %s running on %s exiting...\n", SDK_name, device_name);
 | 
			
		||||
 | 
			
		||||
  exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: InitD3D()
 | 
			
		||||
// Desc: Initializes Direct3D
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
HRESULT InitD3D(HWND hWnd) {
 | 
			
		||||
  // Set up the structure used to create the device and swapchain
 | 
			
		||||
  DXGI_SWAP_CHAIN_DESC sd;
 | 
			
		||||
  ZeroMemory(&sd, sizeof(sd));
 | 
			
		||||
  sd.BufferCount = 1;
 | 
			
		||||
  sd.BufferDesc.Width = g_WindowWidth;
 | 
			
		||||
  sd.BufferDesc.Height = g_WindowHeight;
 | 
			
		||||
  sd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
 | 
			
		||||
  sd.BufferDesc.RefreshRate.Numerator = 60;
 | 
			
		||||
  sd.BufferDesc.RefreshRate.Denominator = 1;
 | 
			
		||||
  sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
 | 
			
		||||
  sd.OutputWindow = hWnd;
 | 
			
		||||
  sd.SampleDesc.Count = 1;
 | 
			
		||||
  sd.SampleDesc.Quality = 0;
 | 
			
		||||
  sd.Windowed = TRUE;
 | 
			
		||||
 | 
			
		||||
  // Create device and swapchain
 | 
			
		||||
  HRESULT hr = sFnPtr_D3D10CreateDeviceAndSwapChain(
 | 
			
		||||
      g_pCudaCapableAdapter, D3D10_DRIVER_TYPE_HARDWARE, NULL, 0,
 | 
			
		||||
      D3D10_SDK_VERSION, &sd, &g_pSwapChain, &g_pd3dDevice);
 | 
			
		||||
  AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
  g_pCudaCapableAdapter->Release();
 | 
			
		||||
 | 
			
		||||
  // Create a render target view of the swapchain
 | 
			
		||||
  ID3D10Texture2D *pBuffer;
 | 
			
		||||
  hr =
 | 
			
		||||
      g_pSwapChain->GetBuffer(0, __uuidof(ID3D10Texture2D), (LPVOID *)&pBuffer);
 | 
			
		||||
  AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
 | 
			
		||||
  hr = g_pd3dDevice->CreateRenderTargetView(pBuffer, NULL, &g_pSwapChainRTV);
 | 
			
		||||
  AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
  pBuffer->Release();
 | 
			
		||||
 | 
			
		||||
  g_pd3dDevice->OMSetRenderTargets(1, &g_pSwapChainRTV, NULL);
 | 
			
		||||
 | 
			
		||||
  // Setup the viewport
 | 
			
		||||
  D3D10_VIEWPORT vp;
 | 
			
		||||
  vp.Width = g_WindowWidth;
 | 
			
		||||
  vp.Height = g_WindowHeight;
 | 
			
		||||
  vp.MinDepth = 0.0f;
 | 
			
		||||
  vp.MaxDepth = 1.0f;
 | 
			
		||||
  vp.TopLeftX = 0;
 | 
			
		||||
  vp.TopLeftY = 0;
 | 
			
		||||
  g_pd3dDevice->RSSetViewports(1, &vp);
 | 
			
		||||
 | 
			
		||||
  // Setup the effect
 | 
			
		||||
  {
 | 
			
		||||
    ID3D10Blob *pCompiledEffect;
 | 
			
		||||
    ID3D10Blob *pErrors = NULL;
 | 
			
		||||
    hr = sFnPtr_D3D10CompileEffectFromMemory((void *)g_simpleEffectSrc,
 | 
			
		||||
                                             sizeof(g_simpleEffectSrc), NULL,
 | 
			
		||||
                                             NULL,  // pDefines
 | 
			
		||||
                                             NULL,  // pIncludes
 | 
			
		||||
                                             0,     // HLSL flags
 | 
			
		||||
                                             0,     // FXFlags
 | 
			
		||||
                                             &pCompiledEffect, &pErrors);
 | 
			
		||||
 | 
			
		||||
    if (pErrors) {
 | 
			
		||||
      LPVOID l_pError = NULL;
 | 
			
		||||
      l_pError = pErrors->GetBufferPointer();  // then cast to a char* to see it
 | 
			
		||||
                                               // in the locals window
 | 
			
		||||
      fprintf(stdout, "Compilation error: \n %s", (char *)l_pError);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    AssertOrQuit(SUCCEEDED(hr));
 | 
			
		||||
 | 
			
		||||
    hr = sFnPtr_D3D10CreateEffectFromMemory(
 | 
			
		||||
        pCompiledEffect->GetBufferPointer(), pCompiledEffect->GetBufferSize(),
 | 
			
		||||
        0,  // FXFlags
 | 
			
		||||
        g_pd3dDevice, NULL, &g_pSimpleEffect);
 | 
			
		||||
    pCompiledEffect->Release();
 | 
			
		||||
 | 
			
		||||
    g_pSimpleTechnique = g_pSimpleEffect->GetTechniqueByName("Render");
 | 
			
		||||
 | 
			
		||||
    g_pvQuadRect =
 | 
			
		||||
        g_pSimpleEffect->GetVariableByName("g_vQuadRect")->AsVector();
 | 
			
		||||
    g_pUseCase = g_pSimpleEffect->GetVariableByName("g_UseCase")->AsScalar();
 | 
			
		||||
 | 
			
		||||
    g_pTexture2D =
 | 
			
		||||
        g_pSimpleEffect->GetVariableByName("g_Texture2D")->AsShaderResource();
 | 
			
		||||
    g_pTexture3D =
 | 
			
		||||
        g_pSimpleEffect->GetVariableByName("g_Texture3D")->AsShaderResource();
 | 
			
		||||
    g_pTextureCube =
 | 
			
		||||
        g_pSimpleEffect->GetVariableByName("g_TextureCube")->AsShaderResource();
 | 
			
		||||
 | 
			
		||||
    // Setup  no Input Layout
 | 
			
		||||
    g_pd3dDevice->IASetInputLayout(0);
 | 
			
		||||
    g_pd3dDevice->IASetPrimitiveTopology(
 | 
			
		||||
        D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  D3D10_RASTERIZER_DESC rasterizerState;
 | 
			
		||||
  rasterizerState.FillMode = D3D10_FILL_SOLID;
 | 
			
		||||
  rasterizerState.CullMode = D3D10_CULL_FRONT;
 | 
			
		||||
  rasterizerState.FrontCounterClockwise = false;
 | 
			
		||||
  rasterizerState.DepthBias = false;
 | 
			
		||||
  rasterizerState.DepthBiasClamp = 0;
 | 
			
		||||
  rasterizerState.SlopeScaledDepthBias = 0;
 | 
			
		||||
  rasterizerState.DepthClipEnable = false;
 | 
			
		||||
  rasterizerState.ScissorEnable = false;
 | 
			
		||||
  rasterizerState.MultisampleEnable = false;
 | 
			
		||||
  rasterizerState.AntialiasedLineEnable = false;
 | 
			
		||||
  g_pd3dDevice->CreateRasterizerState(&rasterizerState, &g_pRasterState);
 | 
			
		||||
  g_pd3dDevice->RSSetState(g_pRasterState);
 | 
			
		||||
 | 
			
		||||
  return S_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: InitTextures()
 | 
			
		||||
// Desc: Initializes Direct3D Textures (allocation and initialization)
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
HRESULT InitTextures() {
 | 
			
		||||
  //
 | 
			
		||||
  // create the D3D resources we'll be using
 | 
			
		||||
  //
 | 
			
		||||
  // 2D texture
 | 
			
		||||
  {
 | 
			
		||||
    g_texture_2d.width = 256;
 | 
			
		||||
    g_texture_2d.height = 256;
 | 
			
		||||
 | 
			
		||||
    D3D10_TEXTURE2D_DESC desc;
 | 
			
		||||
    ZeroMemory(&desc, sizeof(D3D10_TEXTURE2D_DESC));
 | 
			
		||||
    desc.Width = g_texture_2d.width;
 | 
			
		||||
    desc.Height = g_texture_2d.height;
 | 
			
		||||
    desc.MipLevels = 1;
 | 
			
		||||
    desc.ArraySize = 1;
 | 
			
		||||
    desc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
 | 
			
		||||
    desc.SampleDesc.Count = 1;
 | 
			
		||||
    desc.Usage = D3D10_USAGE_DEFAULT;
 | 
			
		||||
    desc.BindFlags = D3D10_BIND_SHADER_RESOURCE;
 | 
			
		||||
 | 
			
		||||
    if (FAILED(
 | 
			
		||||
            g_pd3dDevice->CreateTexture2D(&desc, NULL, &g_texture_2d.pTexture)))
 | 
			
		||||
      return E_FAIL;
 | 
			
		||||
 | 
			
		||||
    if (FAILED(g_pd3dDevice->CreateShaderResourceView(
 | 
			
		||||
            g_texture_2d.pTexture, NULL, &g_texture_2d.pSRView)))
 | 
			
		||||
      return E_FAIL;
 | 
			
		||||
 | 
			
		||||
    g_pTexture2D->SetResource(g_texture_2d.pSRView);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // 3D texture
 | 
			
		||||
  {
 | 
			
		||||
    g_texture_3d.width = 64;
 | 
			
		||||
    g_texture_3d.height = 64;
 | 
			
		||||
    g_texture_3d.depth = 64;
 | 
			
		||||
 | 
			
		||||
    D3D10_TEXTURE3D_DESC desc;
 | 
			
		||||
    ZeroMemory(&desc, sizeof(D3D10_TEXTURE3D_DESC));
 | 
			
		||||
    desc.Width = g_texture_3d.width;
 | 
			
		||||
    desc.Height = g_texture_3d.height;
 | 
			
		||||
    desc.Depth = g_texture_3d.depth;
 | 
			
		||||
    desc.MipLevels = 1;
 | 
			
		||||
    desc.Format = DXGI_FORMAT_R8G8B8A8_SNORM;
 | 
			
		||||
    desc.Usage = D3D10_USAGE_DEFAULT;
 | 
			
		||||
    desc.BindFlags = D3D10_BIND_SHADER_RESOURCE;
 | 
			
		||||
 | 
			
		||||
    if (FAILED(
 | 
			
		||||
            g_pd3dDevice->CreateTexture3D(&desc, NULL, &g_texture_3d.pTexture)))
 | 
			
		||||
      return E_FAIL;
 | 
			
		||||
 | 
			
		||||
    if (FAILED(g_pd3dDevice->CreateShaderResourceView(
 | 
			
		||||
            g_texture_3d.pTexture, NULL, &g_texture_3d.pSRView)))
 | 
			
		||||
      return E_FAIL;
 | 
			
		||||
 | 
			
		||||
    g_pTexture3D->SetResource(g_texture_3d.pSRView);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // cube texture
 | 
			
		||||
  {
 | 
			
		||||
    g_texture_cube.size = 64;
 | 
			
		||||
 | 
			
		||||
    D3D10_TEXTURE2D_DESC desc;
 | 
			
		||||
    ZeroMemory(&desc, sizeof(D3D10_TEXTURE2D_DESC));
 | 
			
		||||
    desc.Width = g_texture_cube.size;
 | 
			
		||||
    desc.Height = g_texture_cube.size;
 | 
			
		||||
    desc.MipLevels = 1;
 | 
			
		||||
    desc.ArraySize = 6;
 | 
			
		||||
    desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
 | 
			
		||||
    desc.SampleDesc.Count = 1;
 | 
			
		||||
    desc.Usage = D3D10_USAGE_DEFAULT;
 | 
			
		||||
    desc.BindFlags = D3D10_BIND_SHADER_RESOURCE;
 | 
			
		||||
    desc.MiscFlags = D3D10_RESOURCE_MISC_TEXTURECUBE;
 | 
			
		||||
 | 
			
		||||
    if (FAILED(g_pd3dDevice->CreateTexture2D(&desc, NULL,
 | 
			
		||||
                                             &g_texture_cube.pTexture)))
 | 
			
		||||
      return E_FAIL;
 | 
			
		||||
 | 
			
		||||
    D3D10_SHADER_RESOURCE_VIEW_DESC SRVDesc;
 | 
			
		||||
    ZeroMemory(&SRVDesc, sizeof(SRVDesc));
 | 
			
		||||
    SRVDesc.Format = desc.Format;
 | 
			
		||||
    SRVDesc.ViewDimension = D3D10_SRV_DIMENSION_TEXTURECUBE;
 | 
			
		||||
    SRVDesc.TextureCube.MipLevels = desc.MipLevels;
 | 
			
		||||
    SRVDesc.TextureCube.MostDetailedMip = 0;
 | 
			
		||||
 | 
			
		||||
    if (FAILED(g_pd3dDevice->CreateShaderResourceView(
 | 
			
		||||
            g_texture_cube.pTexture, &SRVDesc, &g_texture_cube.pSRView)))
 | 
			
		||||
      return E_FAIL;
 | 
			
		||||
 | 
			
		||||
    g_pTextureCube->SetResource(g_texture_cube.pSRView);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return S_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! Run the Cuda part of the computation
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void RunKernels() {
 | 
			
		||||
  static float t = 0.0f;
 | 
			
		||||
 | 
			
		||||
  // populate the 2d texture
 | 
			
		||||
  {
 | 
			
		||||
    cudaArray *cuArray;
 | 
			
		||||
    cudaGraphicsSubResourceGetMappedArray(&cuArray, g_texture_2d.cudaResource,
 | 
			
		||||
                                          0, 0);
 | 
			
		||||
    getLastCudaError(
 | 
			
		||||
        "cudaGraphicsSubResourceGetMappedArray (cuda_texture_2d) failed");
 | 
			
		||||
 | 
			
		||||
    // kick off the kernel and send the staging buffer cudaLinearMemory as an
 | 
			
		||||
    // argument to allow the kernel to write to it
 | 
			
		||||
    cuda_texture_2d(g_texture_2d.cudaLinearMemory, g_texture_2d.width,
 | 
			
		||||
                    g_texture_2d.height, g_texture_2d.pitch, t);
 | 
			
		||||
    getLastCudaError("cuda_texture_2d failed");
 | 
			
		||||
 | 
			
		||||
    // then we want to copy cudaLinearMemory to the D3D texture, via its mapped
 | 
			
		||||
    // form : cudaArray
 | 
			
		||||
    cudaMemcpy2DToArray(
 | 
			
		||||
        cuArray,                                            // dst array
 | 
			
		||||
        0, 0,                                               // offset
 | 
			
		||||
        g_texture_2d.cudaLinearMemory, g_texture_2d.pitch,  // src
 | 
			
		||||
        g_texture_2d.width * 4 * sizeof(float), g_texture_2d.height,  // extent
 | 
			
		||||
        cudaMemcpyDeviceToDevice);                                    // kind
 | 
			
		||||
    getLastCudaError("cudaMemcpy2DToArray failed");
 | 
			
		||||
  }
 | 
			
		||||
  // populate the volume texture
 | 
			
		||||
  {
 | 
			
		||||
    size_t pitchSlice = g_texture_3d.pitch * g_texture_3d.height;
 | 
			
		||||
    cudaArray *cuArray;
 | 
			
		||||
    cudaGraphicsSubResourceGetMappedArray(&cuArray, g_texture_3d.cudaResource,
 | 
			
		||||
                                          0, 0);
 | 
			
		||||
    getLastCudaError(
 | 
			
		||||
        "cudaGraphicsSubResourceGetMappedArray (cuda_texture_3d) failed");
 | 
			
		||||
 | 
			
		||||
    // kick off the kernel and send the staging buffer cudaLinearMemory as an
 | 
			
		||||
    // argument to allow the kernel to write to it
 | 
			
		||||
    cuda_texture_3d(g_texture_3d.cudaLinearMemory, g_texture_3d.width,
 | 
			
		||||
                    g_texture_3d.height, g_texture_3d.depth, g_texture_3d.pitch,
 | 
			
		||||
                    pitchSlice, t);
 | 
			
		||||
    getLastCudaError("cuda_texture_3d failed");
 | 
			
		||||
 | 
			
		||||
    // then we want to copy cudaLinearMemory to the D3D texture, via its mapped
 | 
			
		||||
    // form : cudaArray
 | 
			
		||||
    struct cudaMemcpy3DParms memcpyParams = {0};
 | 
			
		||||
    memcpyParams.dstArray = cuArray;
 | 
			
		||||
    memcpyParams.srcPtr.ptr = g_texture_3d.cudaLinearMemory;
 | 
			
		||||
    memcpyParams.srcPtr.pitch = g_texture_3d.pitch;
 | 
			
		||||
    memcpyParams.srcPtr.xsize = g_texture_3d.width;
 | 
			
		||||
    memcpyParams.srcPtr.ysize = g_texture_3d.height;
 | 
			
		||||
    memcpyParams.extent.width = g_texture_3d.width;
 | 
			
		||||
    memcpyParams.extent.height = g_texture_3d.height;
 | 
			
		||||
    memcpyParams.extent.depth = g_texture_3d.depth;
 | 
			
		||||
    memcpyParams.kind = cudaMemcpyDeviceToDevice;
 | 
			
		||||
    cudaMemcpy3D(&memcpyParams);
 | 
			
		||||
    getLastCudaError("cudaMemcpy3D failed");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // populate the faces of the cube map
 | 
			
		||||
  for (int face = 0; face < 6; ++face) {
 | 
			
		||||
    cudaArray *cuArray;
 | 
			
		||||
    cudaGraphicsSubResourceGetMappedArray(&cuArray, g_texture_cube.cudaResource,
 | 
			
		||||
                                          face, 0);
 | 
			
		||||
    getLastCudaError(
 | 
			
		||||
        "cudaGraphicsSubResourceGetMappedArray (cuda_texture_cube) failed");
 | 
			
		||||
 | 
			
		||||
    // kick off the kernel and send the staging buffer cudaLinearMemory as an
 | 
			
		||||
    // argument to allow the kernel to write to it
 | 
			
		||||
    cuda_texture_cube(g_texture_cube.cudaLinearMemory, g_texture_cube.size,
 | 
			
		||||
                      g_texture_cube.size, g_texture_cube.pitch, face, t);
 | 
			
		||||
    getLastCudaError("cuda_texture_cube failed");
 | 
			
		||||
 | 
			
		||||
    // then we want to copy cudaLinearMemory to the D3D texture, via its mapped
 | 
			
		||||
    // form : cudaArray
 | 
			
		||||
    cudaMemcpy2DToArray(cuArray,  // dst array
 | 
			
		||||
                        0, 0,     // offset
 | 
			
		||||
                        g_texture_cube.cudaLinearMemory,
 | 
			
		||||
                        g_texture_cube.pitch,                          // src
 | 
			
		||||
                        g_texture_cube.size * 4, g_texture_cube.size,  // extent
 | 
			
		||||
                        cudaMemcpyDeviceToDevice);                     // kind
 | 
			
		||||
    getLastCudaError("cudaMemcpy2DToArray failed");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  t += 0.01f;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! Draw the final result on the screen
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void DrawScene() {
 | 
			
		||||
  // Clear the backbuffer to a black color
 | 
			
		||||
  float ClearColor[4] = {0.5f, 0.5f, 0.6f, 1.0f};
 | 
			
		||||
  g_pd3dDevice->ClearRenderTargetView(g_pSwapChainRTV, ClearColor);
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // draw the 2d texture
 | 
			
		||||
  //
 | 
			
		||||
  g_pUseCase->SetInt(0);
 | 
			
		||||
  float quadRect[4] = {-0.9f, -0.9f, 0.7f, 0.7f};
 | 
			
		||||
  g_pvQuadRect->SetFloatVector((float *)&quadRect);
 | 
			
		||||
  g_pSimpleTechnique->GetPassByIndex(0)->Apply(0);
 | 
			
		||||
  g_pd3dDevice->Draw(4, 0);
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // draw a slice the 3d texture
 | 
			
		||||
  //
 | 
			
		||||
  g_pUseCase->SetInt(1);
 | 
			
		||||
  quadRect[1] = 0.1f;
 | 
			
		||||
  g_pvQuadRect->SetFloatVector((float *)&quadRect);
 | 
			
		||||
  g_pSimpleTechnique->GetPassByIndex(0)->Apply(0);
 | 
			
		||||
  g_pd3dDevice->Draw(4, 0);
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // draw the 6 faces of the cube texture
 | 
			
		||||
  //
 | 
			
		||||
  float faceRect[4] = {-0.1f, -0.9f, 0.5f, 0.5f};
 | 
			
		||||
 | 
			
		||||
  for (int f = 0; f < 6; f++) {
 | 
			
		||||
    if (f == 3) {
 | 
			
		||||
      faceRect[0] += 0.55f;
 | 
			
		||||
      faceRect[1] = -0.9f;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    g_pUseCase->SetInt(2 + f);
 | 
			
		||||
    g_pvQuadRect->SetFloatVector((float *)&faceRect);
 | 
			
		||||
    g_pSimpleTechnique->GetPassByIndex(0)->Apply(0);
 | 
			
		||||
    g_pd3dDevice->Draw(4, 0);
 | 
			
		||||
    faceRect[1] += 0.6f;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Present the backbuffer contents to the display
 | 
			
		||||
  g_pSwapChain->Present(0, 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: Cleanup()
 | 
			
		||||
// Desc: Releases all previously initialized objects
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
void Cleanup() {
 | 
			
		||||
  // unregister the Cuda resources
 | 
			
		||||
  cudaGraphicsUnregisterResource(g_texture_2d.cudaResource);
 | 
			
		||||
  getLastCudaError("cudaGraphicsUnregisterResource (g_texture_2d) failed");
 | 
			
		||||
  cudaFree(g_texture_2d.cudaLinearMemory);
 | 
			
		||||
  getLastCudaError("cudaFree (g_texture_2d) failed");
 | 
			
		||||
 | 
			
		||||
  cudaGraphicsUnregisterResource(g_texture_cube.cudaResource);
 | 
			
		||||
  getLastCudaError("cudaGraphicsUnregisterResource (g_texture_cube) failed");
 | 
			
		||||
  cudaFree(g_texture_cube.cudaLinearMemory);
 | 
			
		||||
  getLastCudaError("cudaFree (g_texture_2d) failed");
 | 
			
		||||
 | 
			
		||||
  cudaGraphicsUnregisterResource(g_texture_3d.cudaResource);
 | 
			
		||||
  getLastCudaError("cudaGraphicsUnregisterResource (g_texture_3d) failed");
 | 
			
		||||
  cudaFree(g_texture_3d.cudaLinearMemory);
 | 
			
		||||
  getLastCudaError("cudaFree (g_texture_2d) failed");
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // clean up Direct3D
 | 
			
		||||
  //
 | 
			
		||||
  {
 | 
			
		||||
    // release the resources we created
 | 
			
		||||
    g_texture_2d.pSRView->Release();
 | 
			
		||||
    g_texture_2d.pTexture->Release();
 | 
			
		||||
    g_texture_cube.pSRView->Release();
 | 
			
		||||
    g_texture_cube.pTexture->Release();
 | 
			
		||||
    g_texture_3d.pSRView->Release();
 | 
			
		||||
    g_texture_3d.pTexture->Release();
 | 
			
		||||
 | 
			
		||||
    if (g_pInputLayout != NULL) g_pInputLayout->Release();
 | 
			
		||||
 | 
			
		||||
    if (g_pSimpleEffect != NULL) g_pSimpleEffect->Release();
 | 
			
		||||
 | 
			
		||||
    if (g_pSwapChainRTV != NULL) g_pSwapChainRTV->Release();
 | 
			
		||||
 | 
			
		||||
    if (g_pSwapChain != NULL) g_pSwapChain->Release();
 | 
			
		||||
 | 
			
		||||
    if (g_pd3dDevice != NULL) g_pd3dDevice->Release();
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: Render()
 | 
			
		||||
// Desc: Launches the CUDA kernels to fill in the texture data
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
void Render() {
 | 
			
		||||
  //
 | 
			
		||||
  // map the resources we've registered so we can access them in Cuda
 | 
			
		||||
  // - it is most efficient to map and unmap all resources in a single call,
 | 
			
		||||
  //   and to have the map/unmap calls be the boundary between using the GPU
 | 
			
		||||
  //   for Direct3D and Cuda
 | 
			
		||||
  //
 | 
			
		||||
  static bool doit = true;
 | 
			
		||||
 | 
			
		||||
  if (doit) {
 | 
			
		||||
    doit = true;
 | 
			
		||||
    cudaStream_t stream = 0;
 | 
			
		||||
    const int nbResources = 3;
 | 
			
		||||
    cudaGraphicsResource *ppResources[nbResources] = {
 | 
			
		||||
        g_texture_2d.cudaResource, g_texture_3d.cudaResource,
 | 
			
		||||
        g_texture_cube.cudaResource,
 | 
			
		||||
    };
 | 
			
		||||
    cudaGraphicsMapResources(nbResources, ppResources, stream);
 | 
			
		||||
    getLastCudaError("cudaGraphicsMapResources(3) failed");
 | 
			
		||||
 | 
			
		||||
    //
 | 
			
		||||
    // run kernels which will populate the contents of those textures
 | 
			
		||||
    //
 | 
			
		||||
    RunKernels();
 | 
			
		||||
 | 
			
		||||
    //
 | 
			
		||||
    // unmap the resources
 | 
			
		||||
    //
 | 
			
		||||
    cudaGraphicsUnmapResources(nbResources, ppResources, stream);
 | 
			
		||||
    getLastCudaError("cudaGraphicsUnmapResources(3) failed");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // draw the scene using them
 | 
			
		||||
  //
 | 
			
		||||
  DrawScene();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: MsgProc()
 | 
			
		||||
// Desc: The window's message handler
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
static LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam,
 | 
			
		||||
                              LPARAM lParam) {
 | 
			
		||||
  switch (msg) {
 | 
			
		||||
    case WM_KEYDOWN:
 | 
			
		||||
      if (wParam == VK_ESCAPE) {
 | 
			
		||||
        g_bDone = true;
 | 
			
		||||
        Cleanup();
 | 
			
		||||
        PostQuitMessage(0);
 | 
			
		||||
        return 0;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
    case WM_DESTROY:
 | 
			
		||||
      g_bDone = true;
 | 
			
		||||
      Cleanup();
 | 
			
		||||
      PostQuitMessage(0);
 | 
			
		||||
      return 0;
 | 
			
		||||
 | 
			
		||||
    case WM_PAINT:
 | 
			
		||||
      ValidateRect(hWnd, NULL);
 | 
			
		||||
      return 0;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return DefWindowProc(hWnd, msg, wParam, lParam);
 | 
			
		||||
}
 | 
			
		||||
@ -1,78 +0,0 @@
 | 
			
		||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
#define PI 3.1415926536f
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Paint a 2D texture with a moving red/green hatch pattern on a
 | 
			
		||||
 * strobing blue background.  Note that this kernel reads to and
 | 
			
		||||
 * writes from the texture, hence why this texture was not mapped
 | 
			
		||||
 * as WriteDiscard.
 | 
			
		||||
 */
 | 
			
		||||
__global__ void cuda_kernel_texture_2d(unsigned char *surface, int width,
 | 
			
		||||
                                       int height, size_t pitch, float t) {
 | 
			
		||||
  int x = blockIdx.x * blockDim.x + threadIdx.x;
 | 
			
		||||
  int y = blockIdx.y * blockDim.y + threadIdx.y;
 | 
			
		||||
  float *pixel;
 | 
			
		||||
 | 
			
		||||
  // in the case where, due to quantization into grids, we have
 | 
			
		||||
  // more threads than pixels, skip the threads which don't
 | 
			
		||||
  // correspond to valid pixels
 | 
			
		||||
  if (x >= width || y >= height) return;
 | 
			
		||||
 | 
			
		||||
  // get a pointer to the pixel at (x,y)
 | 
			
		||||
  pixel = (float *)(surface + y * pitch) + 4 * x;
 | 
			
		||||
 | 
			
		||||
  // populate it
 | 
			
		||||
  float value_x = 0.5f + 0.5f * cos(t + 10.0f * ((2.0f * x) / width - 1.0f));
 | 
			
		||||
  float value_y = 0.5f + 0.5f * cos(t + 10.0f * ((2.0f * y) / height - 1.0f));
 | 
			
		||||
  pixel[0] = 0.5 * pixel[0] + 0.5 * pow(value_x, 3.0f);  // red
 | 
			
		||||
  pixel[1] = 0.5 * pixel[1] + 0.5 * pow(value_y, 3.0f);  // green
 | 
			
		||||
  pixel[2] = 0.5f + 0.5f * cos(t);                       // blue
 | 
			
		||||
  pixel[3] = 1;                                          // alpha
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern "C" void cuda_texture_2d(void *surface, int width, int height,
 | 
			
		||||
                                size_t pitch, float t) {
 | 
			
		||||
  cudaError_t error = cudaSuccess;
 | 
			
		||||
 | 
			
		||||
  dim3 Db = dim3(16, 16);  // block dimensions are fixed to be 256 threads
 | 
			
		||||
  dim3 Dg = dim3((width + Db.x - 1) / Db.x, (height + Db.y - 1) / Db.y);
 | 
			
		||||
 | 
			
		||||
  cuda_kernel_texture_2d<<<Dg, Db>>>((unsigned char *)surface, width, height,
 | 
			
		||||
                                     pitch, t);
 | 
			
		||||
 | 
			
		||||
  error = cudaGetLastError();
 | 
			
		||||
 | 
			
		||||
  if (error != cudaSuccess) {
 | 
			
		||||
    printf("cuda_kernel_texture_2d() failed to launch error = %d\n", error);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@ -1,76 +0,0 @@
 | 
			
		||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Paint a 3D texture with a gradient in X (blue) and Z (green), and have every
 | 
			
		||||
 * other Z slice have full red.
 | 
			
		||||
 */
 | 
			
		||||
__global__ void cuda_kernel_texture_3d(unsigned char *surface, int width,
 | 
			
		||||
                                       int height, int depth, size_t pitch,
 | 
			
		||||
                                       size_t pitchSlice, float t) {
 | 
			
		||||
  int x = blockIdx.x * blockDim.x + threadIdx.x;
 | 
			
		||||
  int y = blockIdx.y * blockDim.y + threadIdx.y;
 | 
			
		||||
 | 
			
		||||
  // in the case where, due to quantization into grids, we have
 | 
			
		||||
  // more threads than pixels, skip the threads which don't
 | 
			
		||||
  // correspond to valid pixels
 | 
			
		||||
  if (x >= width || y >= height) return;
 | 
			
		||||
 | 
			
		||||
  // walk across the Z slices of this texture.  it should be noted that
 | 
			
		||||
  // this is far from optimal data access.
 | 
			
		||||
  for (int z = 0; z < depth; ++z) {
 | 
			
		||||
    // get a pointer to this pixel
 | 
			
		||||
    unsigned char *pixel = surface + z * pitchSlice + y * pitch + 4 * x;
 | 
			
		||||
    pixel[0] = (unsigned char)(255.f * (0.5f + 0.5f * 
 | 
			
		||||
        cos(t + (x * x + y * y + z * z) * 0.0001f * 3.14f)));  // red
 | 
			
		||||
    pixel[1] = (unsigned char)(255.f * (0.5f + 0.5f * 
 | 
			
		||||
        sin(t + (x * x + y * y + z * z) * 0.0001f * 3.14f)));  // green
 | 
			
		||||
    pixel[2] = (unsigned char)0;                               // blue
 | 
			
		||||
    pixel[3] = 255;                                            // alpha
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern "C" void cuda_texture_3d(void *surface, int width, int height, int depth,
 | 
			
		||||
                                size_t pitch, size_t pitchSlice, float t) {
 | 
			
		||||
  cudaError_t error = cudaSuccess;
 | 
			
		||||
 | 
			
		||||
  dim3 Db = dim3(16, 16);  // block dimensions are fixed to be 256 threads
 | 
			
		||||
  dim3 Dg = dim3((width + Db.x - 1) / Db.x, (height + Db.y - 1) / Db.y);
 | 
			
		||||
 | 
			
		||||
  cuda_kernel_texture_3d<<<Dg, Db>>>((unsigned char *)surface, width, height,
 | 
			
		||||
                                     depth, pitch, pitchSlice, t);
 | 
			
		||||
 | 
			
		||||
  error = cudaGetLastError();
 | 
			
		||||
 | 
			
		||||
  if (error != cudaSuccess) {
 | 
			
		||||
    printf("cuda_kernel_texture_3d() failed to launch error = %d\n", error);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@ -1,91 +0,0 @@
 | 
			
		||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
#define PI 3.1415926536f
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Paint a 2D surface with a moving bulls-eye pattern.  The "face" parameter
 | 
			
		||||
 * selects
 | 
			
		||||
 * between 6 different colors to use.  We will use a different color on each
 | 
			
		||||
 * face of a
 | 
			
		||||
 * cube map.
 | 
			
		||||
 */
 | 
			
		||||
__global__ void cuda_kernel_texture_cube(char *surface, int width, int height,
 | 
			
		||||
                                         size_t pitch, int face, float t) {
 | 
			
		||||
  int x = blockIdx.x * blockDim.x + threadIdx.x;
 | 
			
		||||
  int y = blockIdx.y * blockDim.y + threadIdx.y;
 | 
			
		||||
  unsigned char *pixel;
 | 
			
		||||
 | 
			
		||||
  // in the case where, due to quantization into grids, we have
 | 
			
		||||
  // more threads than pixels, skip the threads which don't
 | 
			
		||||
  // correspond to valid pixels
 | 
			
		||||
  if (x >= width || y >= height) return;
 | 
			
		||||
 | 
			
		||||
  // get a pointer to this pixel
 | 
			
		||||
  pixel = (unsigned char *)(surface + y * pitch) + 4 * x;
 | 
			
		||||
 | 
			
		||||
  // populate it
 | 
			
		||||
  float theta_x = (2.0f * x) / width - 1.0f;
 | 
			
		||||
  float theta_y = (2.0f * y) / height - 1.0f;
 | 
			
		||||
  float theta = 2.0f * PI * sqrt(theta_x * theta_x + theta_y * theta_y);
 | 
			
		||||
  unsigned char value = 255 * (0.6f + 0.4f * cos(theta + t));
 | 
			
		||||
 | 
			
		||||
  pixel[3] = 255;  // alpha
 | 
			
		||||
 | 
			
		||||
  if (face % 2) {
 | 
			
		||||
    pixel[0] =           // blue
 | 
			
		||||
        pixel[1] =       // green
 | 
			
		||||
        pixel[2] = 0.5;  // red
 | 
			
		||||
    pixel[face / 2] = value;
 | 
			
		||||
  } else {
 | 
			
		||||
    pixel[0] =             // blue
 | 
			
		||||
        pixel[1] =         // green
 | 
			
		||||
        pixel[2] = value;  // red
 | 
			
		||||
    pixel[face / 2] = 0.5;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern "C" void cuda_texture_cube(void *surface, int width, int height,
 | 
			
		||||
                                  size_t pitch, int face, float t) {
 | 
			
		||||
  cudaError_t error = cudaSuccess;
 | 
			
		||||
 | 
			
		||||
  dim3 Db = dim3(16, 16);  // block dimensions are fixed to be 256 threads
 | 
			
		||||
  dim3 Dg = dim3((width + Db.x - 1) / Db.x, (height + Db.y - 1) / Db.y);
 | 
			
		||||
 | 
			
		||||
  cuda_kernel_texture_cube<<<Dg, Db>>>((char *)surface, width, height, pitch,
 | 
			
		||||
                                       face, t);
 | 
			
		||||
 | 
			
		||||
  error = cudaGetLastError();
 | 
			
		||||
 | 
			
		||||
  if (error != cudaSuccess) {
 | 
			
		||||
    printf("cuda_kernel_texture_cube() failed to launch error = %d\n", error);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@ -1,46 +0,0 @@
 | 
			
		||||
################################################################################
 | 
			
		||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
#
 | 
			
		||||
# Redistribution and use in source and binary forms, with or without
 | 
			
		||||
# modification, are permitted provided that the following conditions
 | 
			
		||||
# are met:
 | 
			
		||||
#  * Redistributions of source code must retain the above copyright
 | 
			
		||||
#    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
#  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
#    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
#    documentation and/or other materials provided with the distribution.
 | 
			
		||||
#  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
#    contributors may be used to endorse or promote products derived
 | 
			
		||||
#    from this software without specific prior written permission.
 | 
			
		||||
#
 | 
			
		||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
#
 | 
			
		||||
################################################################################
 | 
			
		||||
#
 | 
			
		||||
# Makefile project only supported on Mac OS X and Linux Platforms)
 | 
			
		||||
#
 | 
			
		||||
################################################################################
 | 
			
		||||
 | 
			
		||||
# Target rules
 | 
			
		||||
all: build
 | 
			
		||||
 | 
			
		||||
build:
 | 
			
		||||
	$(info >>> WARNING - simpleD3D9 is not supported on Linux - waiving sample <<<)
 | 
			
		||||
 | 
			
		||||
run: build
 | 
			
		||||
 | 
			
		||||
testrun: build
 | 
			
		||||
 | 
			
		||||
clean:
 | 
			
		||||
 | 
			
		||||
clobber: clean
 | 
			
		||||
@ -1,49 +0,0 @@
 | 
			
		||||
# simpleD3D9 - Simple Direct3D9 (Vertex Arrays)
 | 
			
		||||
 | 
			
		||||
## Description
 | 
			
		||||
 | 
			
		||||
Simple program which demonstrates interoperability between CUDA and Direct3D9. The program generates a vertex array with CUDA and uses Direct3D9 to render the geometry.  A Direct3D capable device is required.
 | 
			
		||||
 | 
			
		||||
## Key Concepts
 | 
			
		||||
 | 
			
		||||
Graphics Interop
 | 
			
		||||
 | 
			
		||||
## Supported SM Architectures
 | 
			
		||||
 | 
			
		||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.9 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 | 
			
		||||
 | 
			
		||||
## Supported OSes
 | 
			
		||||
 | 
			
		||||
Windows
 | 
			
		||||
 | 
			
		||||
## Supported CPU Architecture
 | 
			
		||||
 | 
			
		||||
x86_64
 | 
			
		||||
 | 
			
		||||
## CUDA APIs involved
 | 
			
		||||
 | 
			
		||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 | 
			
		||||
cudaGraphicsUnmapResources, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaGraphicsUnregisterResource
 | 
			
		||||
 | 
			
		||||
## Dependencies needed to build/run
 | 
			
		||||
[DirectX](../../../README.md#directx)
 | 
			
		||||
 | 
			
		||||
## Prerequisites
 | 
			
		||||
 | 
			
		||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 | 
			
		||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 | 
			
		||||
 | 
			
		||||
## Build and Run
 | 
			
		||||
 | 
			
		||||
### Windows
 | 
			
		||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
 | 
			
		||||
```
 | 
			
		||||
*_vs<version>.sln - for Visual Studio <version>
 | 
			
		||||
```
 | 
			
		||||
Each individual sample has its own set of solution files in its directory:
 | 
			
		||||
 | 
			
		||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
 | 
			
		||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
 | 
			
		||||
 | 
			
		||||
## References (for more details)
 | 
			
		||||
 | 
			
		||||
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 Before Width: | Height: | Size: 269 KiB  | 
										
											Binary file not shown.
										
									
								
							| 
		 Before Width: | Height: | Size: 65 KiB  | 
										
											Binary file not shown.
										
									
								
							| 
		 Before Width: | Height: | Size: 21 KiB  | 
@ -1,668 +0,0 @@
 | 
			
		||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
// This example demonstrates how to use the CUDA Direct3D bindings to fill
 | 
			
		||||
// a vertex buffer with CUDA and use Direct3D to render the data.
 | 
			
		||||
// Host code.
 | 
			
		||||
 | 
			
		||||
#pragma warning(disable : 4312)
 | 
			
		||||
 | 
			
		||||
#include <Windows.h>
 | 
			
		||||
#include <mmsystem.h>
 | 
			
		||||
#pragma warning(disable : 4996)  // disable deprecated warning
 | 
			
		||||
#include <strsafe.h>
 | 
			
		||||
#pragma warning(default : 4996)
 | 
			
		||||
#include <cassert>
 | 
			
		||||
 | 
			
		||||
// includes, cuda
 | 
			
		||||
#include <cuda_runtime_api.h>
 | 
			
		||||
#include <cuda_d3d9_interop.h>
 | 
			
		||||
 | 
			
		||||
// includes, project
 | 
			
		||||
#include <rendercheck_d3d9.h>
 | 
			
		||||
#include <helper_functions.h>  // Helper functions for other non-cuda utilities
 | 
			
		||||
#include <helper_cuda.h>       // CUDA Helper Functions for initialization
 | 
			
		||||
#include <DirectXMath.h>
 | 
			
		||||
using namespace DirectX;
 | 
			
		||||
 | 
			
		||||
#define MAX_EPSILON 10
 | 
			
		||||
 | 
			
		||||
static char *sSDKsample = "simpleD3D9";
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Global variables
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
IDirect3D9Ex *g_pD3D = NULL;              // Used to create the D3DDevice
 | 
			
		||||
unsigned int g_iAdapter = NULL;           // Our adapter
 | 
			
		||||
IDirect3DDevice9Ex *g_pD3DDevice = NULL;  // Our rendering device
 | 
			
		||||
IDirect3DVertexBuffer9 *g_pVB = NULL;     // Buffer to hold vertices
 | 
			
		||||
 | 
			
		||||
struct cudaGraphicsResource *cuda_VB_resource;  // handles D3D9-CUDA exchange
 | 
			
		||||
 | 
			
		||||
D3DDISPLAYMODEEX g_d3ddm;
 | 
			
		||||
D3DPRESENT_PARAMETERS g_d3dpp;
 | 
			
		||||
 | 
			
		||||
bool g_bWindowed = true;
 | 
			
		||||
bool g_bDeviceLost = false;
 | 
			
		||||
bool g_bPassed = true;
 | 
			
		||||
 | 
			
		||||
// A structure for our custom vertex type
 | 
			
		||||
struct CUSTOMVERTEX {
 | 
			
		||||
  FLOAT x, y, z;  // The untransformed, 3D position for the vertex
 | 
			
		||||
  DWORD color;    // The vertex color
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Our custom FVF, which describes our custom vertex structure
 | 
			
		||||
#define D3DFVF_CUSTOMVERTEX (D3DFVF_XYZ | D3DFVF_DIFFUSE)
 | 
			
		||||
 | 
			
		||||
const unsigned int g_WindowWidth = 512;
 | 
			
		||||
const unsigned int g_WindowHeight = 512;
 | 
			
		||||
 | 
			
		||||
const unsigned int g_MeshWidth = 256;
 | 
			
		||||
const unsigned int g_MeshHeight = 256;
 | 
			
		||||
 | 
			
		||||
const unsigned int g_NumVertices = g_MeshWidth * g_MeshHeight;
 | 
			
		||||
 | 
			
		||||
bool g_bQAReadback = false;
 | 
			
		||||
int g_iFrameToCompare = 10;
 | 
			
		||||
 | 
			
		||||
int *pArgc = NULL;
 | 
			
		||||
char **pArgv = NULL;
 | 
			
		||||
 | 
			
		||||
float anim;
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Forward declarations
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
void runTest(int argc, char **argv, char *ref_file);
 | 
			
		||||
void runCuda();
 | 
			
		||||
bool SaveVBResult(int argc, char **argv);
 | 
			
		||||
HRESULT InitD3D9(HWND hWnd);
 | 
			
		||||
HRESULT InitD3D9RenderState();
 | 
			
		||||
HRESULT InitCUDA();
 | 
			
		||||
HRESULT RestoreContextResources();
 | 
			
		||||
HRESULT InitVertexBuffer();
 | 
			
		||||
HRESULT FreeVertexBuffer();
 | 
			
		||||
VOID Cleanup();
 | 
			
		||||
VOID SetupMatrices();
 | 
			
		||||
HRESULT Render();
 | 
			
		||||
LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
 | 
			
		||||
 | 
			
		||||
// CUDA D3D9 kernel
 | 
			
		||||
extern "C" void simpleD3DKernel(float4 *pos, unsigned int width,
 | 
			
		||||
                                unsigned int height, float time);
 | 
			
		||||
 | 
			
		||||
#define NAME_LEN 512
 | 
			
		||||
 | 
			
		||||
char device_name[NAME_LEN];
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Program main
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
int main(int argc, char **argv) {
 | 
			
		||||
  char *ref_file = NULL;
 | 
			
		||||
 | 
			
		||||
  pArgc = &argc;
 | 
			
		||||
  pArgv = argv;
 | 
			
		||||
 | 
			
		||||
  printf("> %s starting...\n", sSDKsample);
 | 
			
		||||
 | 
			
		||||
  // command line options
 | 
			
		||||
  if (argc > 1) {
 | 
			
		||||
    if (checkCmdLineFlag(argc, (const char **)argv, "file")) {
 | 
			
		||||
      getCmdLineArgumentString(argc, (const char **)argv, "file",
 | 
			
		||||
                               (char **)&ref_file);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  runTest(argc, argv, ref_file);
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // and exit
 | 
			
		||||
  //
 | 
			
		||||
  printf("%s running on %s exiting...\n", sSDKsample, device_name);
 | 
			
		||||
  printf("%s sample finished returned: %s\n", sSDKsample,
 | 
			
		||||
         (g_bPassed ? "OK" : "ERROR!"));
 | 
			
		||||
  exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! Run a simple test for CUDA
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void runTest(int argc, char **argv, char *ref_file) {
 | 
			
		||||
  // Register the window class
 | 
			
		||||
  WNDCLASSEX wc = {sizeof(WNDCLASSEX),     CS_CLASSDC, MsgProc, 0L,   0L,
 | 
			
		||||
                   GetModuleHandle(NULL),  NULL,       NULL,    NULL, NULL,
 | 
			
		||||
                   "CUDA/D3D9 simpleD3D9", NULL};
 | 
			
		||||
  RegisterClassEx(&wc);
 | 
			
		||||
 | 
			
		||||
  // Create the application's window
 | 
			
		||||
  int xBorder = ::GetSystemMetrics(SM_CXSIZEFRAME);
 | 
			
		||||
  int yBorder = ::GetSystemMetrics(SM_CYSIZEFRAME);
 | 
			
		||||
  int yMenu = ::GetSystemMetrics(SM_CYMENU);
 | 
			
		||||
  HWND hWnd = CreateWindow(
 | 
			
		||||
      wc.lpszClassName, "CUDA/D3D9 simpleD3D9", WS_OVERLAPPEDWINDOW, 0, 0,
 | 
			
		||||
      g_WindowWidth + 2 * xBorder, g_WindowHeight + 2 * yBorder + yMenu, NULL,
 | 
			
		||||
      NULL, wc.hInstance, NULL);
 | 
			
		||||
 | 
			
		||||
  // Initialize Direct3D9
 | 
			
		||||
  if (SUCCEEDED(InitD3D9(hWnd)) && SUCCEEDED(InitCUDA())) {
 | 
			
		||||
    // Create the scene geometry
 | 
			
		||||
    if (SUCCEEDED(InitVertexBuffer())) {
 | 
			
		||||
      // This is the normal case (D3D9 device is present)
 | 
			
		||||
      if (!g_bDeviceLost) {
 | 
			
		||||
        // Initialize D3D9 vertex buffer contents using CUDA kernel
 | 
			
		||||
        runCuda();
 | 
			
		||||
 | 
			
		||||
        // Save result
 | 
			
		||||
        SaveVBResult(argc, argv);
 | 
			
		||||
 | 
			
		||||
        // Show the window
 | 
			
		||||
        ShowWindow(hWnd, SW_SHOWDEFAULT);
 | 
			
		||||
        UpdateWindow(hWnd);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // Enter the message loop
 | 
			
		||||
      MSG msg;
 | 
			
		||||
      ZeroMemory(&msg, sizeof(msg));
 | 
			
		||||
 | 
			
		||||
      while (msg.message != WM_QUIT) {
 | 
			
		||||
        if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE)) {
 | 
			
		||||
          TranslateMessage(&msg);
 | 
			
		||||
          DispatchMessage(&msg);
 | 
			
		||||
        } else {
 | 
			
		||||
          Render();
 | 
			
		||||
 | 
			
		||||
          if (ref_file != NULL) {
 | 
			
		||||
            for (int count = 0; count < g_iFrameToCompare; count++) {
 | 
			
		||||
              Render();
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            const char *cur_image_path = "simpleD3D9.ppm";
 | 
			
		||||
 | 
			
		||||
            // Save a reference of our current test run image
 | 
			
		||||
            CheckRenderD3D9::BackbufferToPPM(g_pD3DDevice, cur_image_path);
 | 
			
		||||
 | 
			
		||||
            // compare to offical reference image, printing PASS or FAIL.
 | 
			
		||||
            g_bPassed = CheckRenderD3D9::PPMvsPPM(cur_image_path, ref_file,
 | 
			
		||||
                                                  argv[0], MAX_EPSILON, 0.15f);
 | 
			
		||||
 | 
			
		||||
            Cleanup();
 | 
			
		||||
 | 
			
		||||
            PostQuitMessage(0);
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  UnregisterClass(wc.lpszClassName, wc.hInstance);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! Run the Cuda part of the computation
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void runCuda() {
 | 
			
		||||
  HRESULT hr = S_OK;
 | 
			
		||||
 | 
			
		||||
  // Map vertex buffer to Cuda
 | 
			
		||||
  float4 *d_ptr;
 | 
			
		||||
 | 
			
		||||
  // CUDA Map call to the Vertex Buffer and return a pointer
 | 
			
		||||
  checkCudaErrors(cudaGraphicsMapResources(1, &cuda_VB_resource, 0));
 | 
			
		||||
  getLastCudaError("cudaGraphicsMapResources failed");
 | 
			
		||||
  // This gets a pointer from the Vertex Buffer
 | 
			
		||||
  size_t num_bytes;
 | 
			
		||||
  checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
 | 
			
		||||
      (void **)&d_ptr, &num_bytes, cuda_VB_resource));
 | 
			
		||||
  getLastCudaError("cudaGraphicsResourceGetMappedPointer failed");
 | 
			
		||||
 | 
			
		||||
  // Execute kernel
 | 
			
		||||
  simpleD3DKernel(d_ptr, g_MeshWidth, g_MeshHeight, anim);
 | 
			
		||||
 | 
			
		||||
  // CUDA Map Unmap vertex buffer
 | 
			
		||||
  checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_VB_resource, 0));
 | 
			
		||||
  getLastCudaError("cudaGraphicsUnmapResource failed");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! Check if the result is correct or write data to file for external
 | 
			
		||||
//! regression testing
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
bool SaveVBResult(int argc, char **argv) {
 | 
			
		||||
  // Lock vertex buffer
 | 
			
		||||
  float *data;
 | 
			
		||||
 | 
			
		||||
  if (FAILED(g_pVB->Lock(0, 0, (void **)&data, 0))) {
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Save result
 | 
			
		||||
  if (checkCmdLineFlag(argc, (const char **)argv, "regression")) {
 | 
			
		||||
    // write file for regression test
 | 
			
		||||
    sdkWriteFile<float>("./data/regression.dat", data, sizeof(CUSTOMVERTEX),
 | 
			
		||||
                        0.0f, false);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // unlock
 | 
			
		||||
  if (FAILED(g_pVB->Unlock())) {
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: InitD3D9()
 | 
			
		||||
// Desc: Initializes Direct3D9
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
HRESULT InitD3D9(HWND hWnd) {
 | 
			
		||||
  // Create the D3D object.
 | 
			
		||||
  if (S_OK != Direct3DCreate9Ex(D3D_SDK_VERSION, &g_pD3D)) {
 | 
			
		||||
    return E_FAIL;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  D3DADAPTER_IDENTIFIER9 adapterId;
 | 
			
		||||
  int device;
 | 
			
		||||
  bool bDeviceFound = false;
 | 
			
		||||
  printf("\n");
 | 
			
		||||
 | 
			
		||||
  cudaError cuStatus;
 | 
			
		||||
 | 
			
		||||
  for (g_iAdapter = 0; g_iAdapter < g_pD3D->GetAdapterCount(); g_iAdapter++) {
 | 
			
		||||
    HRESULT hr = g_pD3D->GetAdapterIdentifier(g_iAdapter, 0, &adapterId);
 | 
			
		||||
 | 
			
		||||
    if (FAILED(hr)) {
 | 
			
		||||
      continue;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    cuStatus = cudaD3D9GetDevice(&device, adapterId.DeviceName);
 | 
			
		||||
    // This prints and resets the cudaError to cudaSuccess
 | 
			
		||||
    printLastCudaError("cudaD3D9GetDevice failed");
 | 
			
		||||
 | 
			
		||||
    printf("> Display Device #%d: \"%s\" %s Direct3D9\n", g_iAdapter,
 | 
			
		||||
           adapterId.Description,
 | 
			
		||||
           (cuStatus == cudaSuccess) ? "supports" : "does not support");
 | 
			
		||||
 | 
			
		||||
    if (cudaSuccess == cuStatus) {
 | 
			
		||||
      bDeviceFound = true;
 | 
			
		||||
      STRCPY(device_name, NAME_LEN, adapterId.Description);
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // we check to make sure we have found a cuda-compatible D3D device to work on
 | 
			
		||||
  if (!bDeviceFound) {
 | 
			
		||||
    printf("\n");
 | 
			
		||||
    printf("  No CUDA-compatible Direct3D9 device available\n");
 | 
			
		||||
    printf("PASSED\n");
 | 
			
		||||
    // destroy the D3D device
 | 
			
		||||
    g_pD3D->Release();
 | 
			
		||||
    exit(EXIT_SUCCESS);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  RECT rc;
 | 
			
		||||
  GetClientRect(hWnd, &rc);
 | 
			
		||||
  g_pD3D->GetAdapterDisplayModeEx(g_iAdapter, &g_d3ddm, NULL);
 | 
			
		||||
 | 
			
		||||
  // Set up the structure used to create the D3DDevice
 | 
			
		||||
  ZeroMemory(&g_d3dpp, sizeof(g_d3dpp));
 | 
			
		||||
  g_d3dpp.Windowed = g_bWindowed;
 | 
			
		||||
  g_d3dpp.BackBufferCount = 1;
 | 
			
		||||
  g_d3dpp.hDeviceWindow = hWnd;
 | 
			
		||||
  g_d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
 | 
			
		||||
  g_d3dpp.BackBufferFormat = g_d3ddm.Format;
 | 
			
		||||
  g_d3dpp.FullScreen_RefreshRateInHz = 0;  // set to 60 for fullscreen, and also
 | 
			
		||||
                                           // don't forget to set Windowed to
 | 
			
		||||
                                           // FALSE
 | 
			
		||||
  g_d3dpp.PresentationInterval =
 | 
			
		||||
      D3DPRESENT_INTERVAL_ONE;  // D3DPRESENT_DONOTWAIT;
 | 
			
		||||
 | 
			
		||||
  g_d3dpp.BackBufferWidth = g_WindowWidth;
 | 
			
		||||
  g_d3dpp.BackBufferHeight = g_WindowHeight;
 | 
			
		||||
 | 
			
		||||
  // Create the D3DDevice
 | 
			
		||||
  if (FAILED(g_pD3D->CreateDeviceEx(g_iAdapter, D3DDEVTYPE_HAL, hWnd,
 | 
			
		||||
                                    D3DCREATE_HARDWARE_VERTEXPROCESSING,
 | 
			
		||||
                                    &g_d3dpp, NULL, &g_pD3DDevice))) {
 | 
			
		||||
    return E_FAIL;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (FAILED(InitD3D9RenderState())) {
 | 
			
		||||
    return E_FAIL;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return S_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Initialize the D3D Rendering State
 | 
			
		||||
HRESULT InitD3D9RenderState() {
 | 
			
		||||
  // Turn off culling, so we see the front and back of the triangle
 | 
			
		||||
  if (FAILED(g_pD3DDevice->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE))) {
 | 
			
		||||
    return E_FAIL;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Turn off D3D lighting, since we are providing our own vertex colors
 | 
			
		||||
  if (FAILED(g_pD3DDevice->SetRenderState(D3DRS_LIGHTING, FALSE))) {
 | 
			
		||||
    return E_FAIL;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return S_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
HRESULT InitCUDA() {
 | 
			
		||||
  printf("InitCUDA() g_pD3DDevice = %p\n", g_pD3DDevice);
 | 
			
		||||
 | 
			
		||||
  // Now we need to bind a CUDA context to the DX9 device
 | 
			
		||||
  // This is the CUDA 2.0 DX9 interface (required for Windows XP and Vista)
 | 
			
		||||
  cudaD3D9SetDirect3DDevice(g_pD3DDevice);
 | 
			
		||||
  getLastCudaError("cudaD3D9SetDirect3DDevice failed");
 | 
			
		||||
 | 
			
		||||
  return S_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! RestoreContextResourcess
 | 
			
		||||
//    - this function restores all of the CUDA/D3D resources and contexts
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
HRESULT RestoreContextResources() {
 | 
			
		||||
  // Reinitialize D3D9 resources, CUDA resources/contexts
 | 
			
		||||
  InitCUDA();
 | 
			
		||||
  InitVertexBuffer();
 | 
			
		||||
  InitD3D9RenderState();
 | 
			
		||||
 | 
			
		||||
  return S_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: InitVertexBuffer()
 | 
			
		||||
// Desc: Creates the scene geometry (Vertex Buffer)
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
HRESULT InitVertexBuffer() {
 | 
			
		||||
  // Create vertex buffer
 | 
			
		||||
  if (FAILED(g_pD3DDevice->CreateVertexBuffer(
 | 
			
		||||
          g_NumVertices * sizeof(CUSTOMVERTEX), 0, D3DFVF_CUSTOMVERTEX,
 | 
			
		||||
          D3DPOOL_DEFAULT, &g_pVB, NULL))) {
 | 
			
		||||
    return E_FAIL;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Initialize interoperability between CUDA and Direct3D9
 | 
			
		||||
  // Register vertex buffer with CUDA
 | 
			
		||||
  cudaGraphicsD3D9RegisterResource(&cuda_VB_resource, g_pVB,
 | 
			
		||||
                                   cudaD3D9RegisterFlagsNone);
 | 
			
		||||
  getLastCudaError("cudaGraphicsD3D9RegisterResource failed");
 | 
			
		||||
 | 
			
		||||
  return S_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: FreeVertexBuffer()
 | 
			
		||||
// Desc: Free's the Vertex Buffer resource
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
HRESULT FreeVertexBuffer() {
 | 
			
		||||
  if (g_pVB != NULL) {
 | 
			
		||||
    // Unregister vertex buffer
 | 
			
		||||
    cudaGraphicsUnregisterResource(cuda_VB_resource);
 | 
			
		||||
    getLastCudaError("cudaGraphicsUnregisterResource failed");
 | 
			
		||||
 | 
			
		||||
    g_pVB->Release();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return S_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: Cleanup()
 | 
			
		||||
// Desc: Releases all previously initialized objects
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
VOID Cleanup() {
 | 
			
		||||
  FreeVertexBuffer();
 | 
			
		||||
 | 
			
		||||
  if (g_pD3DDevice != NULL) {
 | 
			
		||||
    g_pD3DDevice->Release();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (g_pD3D != NULL) {
 | 
			
		||||
    g_pD3D->Release();
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: SetupMatrices()
 | 
			
		||||
// Desc: Sets up the world, view, and projection transform matrices.
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
VOID SetupMatrices() {
 | 
			
		||||
  // For our world matrix, we will just rotate the object about the y-axis.
 | 
			
		||||
  XMFLOAT4X4 matWorldFloat;
 | 
			
		||||
  XMMATRIX matWorld;
 | 
			
		||||
  matWorld = XMMatrixIdentity();
 | 
			
		||||
  XMStoreFloat4x4(&matWorldFloat, matWorld);
 | 
			
		||||
  g_pD3DDevice->SetTransform(D3DTS_WORLD, (D3DMATRIX *)&matWorldFloat);
 | 
			
		||||
 | 
			
		||||
  // Set up our view matrix. A view matrix can be defined given an eye point,
 | 
			
		||||
  // a point to lookat, and a direction for which way is up. Here, we set the
 | 
			
		||||
  // eye five units back along the z-axis and up three units, look at the
 | 
			
		||||
  // origin, and define "up" to be in the y-direction.
 | 
			
		||||
  XMVECTOR vEyePt = {0.0f, 3.0f, -2.0f};
 | 
			
		||||
  XMVECTOR vLookatPt = {0.0f, 0.0f, 0.0f};
 | 
			
		||||
  XMVECTOR vUpVec = {0.0f, 1.0f, 0.0f};
 | 
			
		||||
  XMMATRIX matView;
 | 
			
		||||
  XMFLOAT4X4 matViewFloat;
 | 
			
		||||
  matView = XMMatrixLookAtLH(vEyePt, vLookatPt, vUpVec);
 | 
			
		||||
  XMStoreFloat4x4(&matViewFloat, matView);
 | 
			
		||||
  g_pD3DDevice->SetTransform(D3DTS_VIEW, (D3DMATRIX *)&matViewFloat);
 | 
			
		||||
 | 
			
		||||
  // For the projection matrix, we set up a perspective transform (which
 | 
			
		||||
  // transforms geometry from 3D view space to 2D viewport space, with
 | 
			
		||||
  // a perspective divide making objects smaller in the distance). To build
 | 
			
		||||
  // a perpsective transform, we need the field of view (1/4 pi is common),
 | 
			
		||||
  // the aspect ratio, and the near and far clipping planes (which define at
 | 
			
		||||
  // what distances geometry should be no longer be rendered).
 | 
			
		||||
  XMMATRIX matProj;
 | 
			
		||||
  XMFLOAT4X4 matProjFloat;
 | 
			
		||||
  matProj = XMMatrixPerspectiveFovLH((float)XM_PI / 4, 1.0f, 1.0f, 100.0f);
 | 
			
		||||
  XMStoreFloat4x4(&matProjFloat, matProj);
 | 
			
		||||
  g_pD3DDevice->SetTransform(D3DTS_PROJECTION, (D3DMATRIX *)&matProjFloat);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! DeviceLostHandler
 | 
			
		||||
//    - this function handles reseting and initialization of the D3D device
 | 
			
		||||
//      in the event this Device gets Lost
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
HRESULT DeviceLostHandler() {
 | 
			
		||||
  HRESULT hr = S_OK;
 | 
			
		||||
 | 
			
		||||
  // test the cooperative level to see if it's okay
 | 
			
		||||
  // to render
 | 
			
		||||
  if (FAILED(hr = g_pD3DDevice->TestCooperativeLevel())) {
 | 
			
		||||
    // if the device was truly lost, (i.e., a fullscreen device just lost
 | 
			
		||||
    // focus), wait
 | 
			
		||||
    // until we g_et it back
 | 
			
		||||
    if (hr == D3DERR_DEVICELOST) {
 | 
			
		||||
      return S_OK;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // eventually, we will g_et this return value,
 | 
			
		||||
    // indicating that we can now reset the device
 | 
			
		||||
    if (hr == D3DERR_DEVICENOTRESET) {
 | 
			
		||||
      // if we are windowed, read the desktop mode and use the same format for
 | 
			
		||||
      // the back buffer; this effectively turns off color conversion
 | 
			
		||||
 | 
			
		||||
      if (g_bWindowed) {
 | 
			
		||||
        g_pD3D->GetAdapterDisplayModeEx(g_iAdapter, &g_d3ddm, NULL);
 | 
			
		||||
        g_d3dpp.BackBufferFormat = g_d3ddm.Format;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // now try to reset the device
 | 
			
		||||
      if (FAILED(hr = g_pD3DDevice->Reset(&g_d3dpp))) {
 | 
			
		||||
        return hr;
 | 
			
		||||
      } else {
 | 
			
		||||
        // This is a common function we use to restore all hardware
 | 
			
		||||
        // resources/state
 | 
			
		||||
        RestoreContextResources();
 | 
			
		||||
 | 
			
		||||
        // we have acquired the device
 | 
			
		||||
        g_bDeviceLost = false;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return hr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: Render()
 | 
			
		||||
// Desc: Draws the scene
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
HRESULT Render() {
 | 
			
		||||
  HRESULT hr = S_OK;
 | 
			
		||||
 | 
			
		||||
  // Begin code to handle case where the D3D gets lost
 | 
			
		||||
  if (g_bDeviceLost) {
 | 
			
		||||
    if (FAILED(hr = DeviceLostHandler())) {
 | 
			
		||||
      fprintf(stderr, "DeviceLostHandler FAILED returned %08x\n", hr);
 | 
			
		||||
      return hr;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fprintf(stderr, "Render DeviceLost handler\n");
 | 
			
		||||
 | 
			
		||||
    // test the cooperative level to see if it's okay
 | 
			
		||||
    // to render
 | 
			
		||||
    if (FAILED(hr = g_pD3DDevice->TestCooperativeLevel())) {
 | 
			
		||||
      fprintf(stderr,
 | 
			
		||||
              "TestCooperativeLevel = %08x failed, will attempt to reset\n",
 | 
			
		||||
              hr);
 | 
			
		||||
 | 
			
		||||
      // if the device was truly lost, (i.e., a fullscreen device just lost
 | 
			
		||||
      // focus), wait
 | 
			
		||||
      // until we g_et it back
 | 
			
		||||
 | 
			
		||||
      if (hr == D3DERR_DEVICELOST) {
 | 
			
		||||
        fprintf(
 | 
			
		||||
            stderr,
 | 
			
		||||
            "TestCooperativeLevel = %08x DeviceLost, will retry next call\n",
 | 
			
		||||
            hr);
 | 
			
		||||
        return S_OK;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // eventually, we will g_et this return value,
 | 
			
		||||
      // indicating that we can now reset the device
 | 
			
		||||
      if (hr == D3DERR_DEVICENOTRESET) {
 | 
			
		||||
        fprintf(stderr,
 | 
			
		||||
                "TestCooperativeLevel = %08x will try to RESET the device\n",
 | 
			
		||||
                hr);
 | 
			
		||||
        // if we are windowed, read the desktop mode and use the same format for
 | 
			
		||||
        // the back buffer; this effectively turns off color conversion
 | 
			
		||||
 | 
			
		||||
        if (g_bWindowed) {
 | 
			
		||||
          g_pD3D->GetAdapterDisplayModeEx(g_iAdapter, &g_d3ddm, NULL);
 | 
			
		||||
          g_d3dpp.BackBufferFormat = g_d3ddm.Format;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // now try to reset the device
 | 
			
		||||
        if (FAILED(hr = g_pD3DDevice->Reset(&g_d3dpp))) {
 | 
			
		||||
          fprintf(stderr, "TestCooperativeLevel = %08x RESET device FAILED\n",
 | 
			
		||||
                  hr);
 | 
			
		||||
          return hr;
 | 
			
		||||
        } else {
 | 
			
		||||
          fprintf(stderr, "TestCooperativeLevel = %08x RESET device SUCCESS!\n",
 | 
			
		||||
                  hr);
 | 
			
		||||
 | 
			
		||||
          // Reinitialize D3D9 resources, CUDA resources/contexts
 | 
			
		||||
          InitCUDA();
 | 
			
		||||
          InitVertexBuffer();
 | 
			
		||||
          InitD3D9RenderState();
 | 
			
		||||
 | 
			
		||||
          fprintf(stderr, "TestCooperativeLevel = %08x INIT device SUCCESS!\n",
 | 
			
		||||
                  hr);
 | 
			
		||||
 | 
			
		||||
          // we have acquired the device
 | 
			
		||||
          g_bDeviceLost = false;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      return hr;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (!g_bDeviceLost) {
 | 
			
		||||
    // Clear the backbuffer to a black color
 | 
			
		||||
    g_pD3DDevice->Clear(0, NULL, D3DCLEAR_TARGET, D3DCOLOR_XRGB(0, 0, 0), 1.0f,
 | 
			
		||||
                        0);
 | 
			
		||||
 | 
			
		||||
    // Run CUDA to update vertex positions
 | 
			
		||||
    runCuda();
 | 
			
		||||
 | 
			
		||||
    // Begin the scene
 | 
			
		||||
    if (SUCCEEDED(g_pD3DDevice->BeginScene())) {
 | 
			
		||||
      // Setup the world, view, and projection matrices
 | 
			
		||||
      SetupMatrices();
 | 
			
		||||
 | 
			
		||||
      // Render the vertex buffer contents
 | 
			
		||||
      g_pD3DDevice->SetStreamSource(0, g_pVB, 0, sizeof(CUSTOMVERTEX));
 | 
			
		||||
      g_pD3DDevice->SetFVF(D3DFVF_CUSTOMVERTEX);
 | 
			
		||||
      g_pD3DDevice->DrawPrimitive(D3DPT_POINTLIST, 0, g_NumVertices);
 | 
			
		||||
 | 
			
		||||
      // End the scene
 | 
			
		||||
      g_pD3DDevice->EndScene();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Present the backbuffer contents to the display
 | 
			
		||||
    hr = g_pD3DDevice->Present(NULL, NULL, NULL, NULL);
 | 
			
		||||
 | 
			
		||||
    if (hr == D3DERR_DEVICELOST) {
 | 
			
		||||
      fprintf(stderr, "drawScene Present = %08x detected D3D DeviceLost\n", hr);
 | 
			
		||||
      g_bDeviceLost = true;
 | 
			
		||||
 | 
			
		||||
      FreeVertexBuffer();
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  anim += 0.1f;
 | 
			
		||||
 | 
			
		||||
  return hr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: MsgProc()
 | 
			
		||||
// Desc: The window's message handler
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) {
 | 
			
		||||
  switch (msg) {
 | 
			
		||||
    case WM_DESTROY:
 | 
			
		||||
    case WM_KEYDOWN:
 | 
			
		||||
      if (msg != WM_KEYDOWN || wParam == 27) {
 | 
			
		||||
        Cleanup();
 | 
			
		||||
 | 
			
		||||
        PostQuitMessage(0);
 | 
			
		||||
        return 0;
 | 
			
		||||
      }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return DefWindowProc(hWnd, msg, wParam, lParam);
 | 
			
		||||
}
 | 
			
		||||
										
											Binary file not shown.
										
									
								
							@ -1,79 +0,0 @@
 | 
			
		||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
// This example demonstrates how to use the CUDA Direct3D bindings with the
 | 
			
		||||
// runtime API.
 | 
			
		||||
// Device code.
 | 
			
		||||
 | 
			
		||||
#ifndef _SIMPLED3D_KERNEL_CU_
 | 
			
		||||
#define _SIMPLED3D_KERNEL_CU_
 | 
			
		||||
 | 
			
		||||
// includes, C string library
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! Simple kernel to modify vertex positions in sine wave pattern
 | 
			
		||||
//! @param pos  pos in global memory
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
__global__ void kernel(float4 *pos, unsigned int width, unsigned int height,
 | 
			
		||||
                       float time) {
 | 
			
		||||
  unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
 | 
			
		||||
  unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
 | 
			
		||||
 | 
			
		||||
  // calculate uv coordinates
 | 
			
		||||
  float u = x / (float)width;
 | 
			
		||||
  float v = y / (float)height;
 | 
			
		||||
  u = u * 2.0f - 1.0f;
 | 
			
		||||
  v = v * 2.0f - 1.0f;
 | 
			
		||||
 | 
			
		||||
  // calculate simple sine wave pattern
 | 
			
		||||
  float freq = 4.0f;
 | 
			
		||||
  float w = sinf(u * freq + time) * cosf(v * freq + time) * 0.5f;
 | 
			
		||||
 | 
			
		||||
  // write output vertex
 | 
			
		||||
  pos[y * width + x] = make_float4(u, w, v, __int_as_float(0xff00ff00));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern "C" void simpleD3DKernel(float4 *pos, unsigned int width,
 | 
			
		||||
                                unsigned int height, float time) {
 | 
			
		||||
  cudaError_t error = cudaSuccess;
 | 
			
		||||
 | 
			
		||||
  dim3 block(8, 8, 1);
 | 
			
		||||
  dim3 grid(width / block.x, height / block.y, 1);
 | 
			
		||||
 | 
			
		||||
  kernel<<<grid, block>>>(pos, width, height, time);
 | 
			
		||||
 | 
			
		||||
  error = cudaGetLastError();
 | 
			
		||||
 | 
			
		||||
  if (error != cudaSuccess) {
 | 
			
		||||
    printf("kernel() failed to launch error = %d\n", error);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif  // #ifndef _SIMPLED3D_KERNEL_CU_
 | 
			
		||||
@ -1,46 +0,0 @@
 | 
			
		||||
################################################################################
 | 
			
		||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
#
 | 
			
		||||
# Redistribution and use in source and binary forms, with or without
 | 
			
		||||
# modification, are permitted provided that the following conditions
 | 
			
		||||
# are met:
 | 
			
		||||
#  * Redistributions of source code must retain the above copyright
 | 
			
		||||
#    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
#  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
#    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
#    documentation and/or other materials provided with the distribution.
 | 
			
		||||
#  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
#    contributors may be used to endorse or promote products derived
 | 
			
		||||
#    from this software without specific prior written permission.
 | 
			
		||||
#
 | 
			
		||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
#
 | 
			
		||||
################################################################################
 | 
			
		||||
#
 | 
			
		||||
# Makefile project only supported on Mac OS X and Linux Platforms)
 | 
			
		||||
#
 | 
			
		||||
################################################################################
 | 
			
		||||
 | 
			
		||||
# Target rules
 | 
			
		||||
all: build
 | 
			
		||||
 | 
			
		||||
build:
 | 
			
		||||
	$(info >>> WARNING - simpleD3D9Texture is not supported on Linux - waiving sample <<<)
 | 
			
		||||
 | 
			
		||||
run: build
 | 
			
		||||
 | 
			
		||||
testrun: build
 | 
			
		||||
 | 
			
		||||
clean:
 | 
			
		||||
 | 
			
		||||
clobber: clean
 | 
			
		||||
@ -1,49 +0,0 @@
 | 
			
		||||
# simpleD3D9Texture - Simple D3D9 Texture
 | 
			
		||||
 | 
			
		||||
## Description
 | 
			
		||||
 | 
			
		||||
Simple program which demonstrates Direct3D9 Texture interoperability with CUDA.  The program creates a number of D3D9 Textures (2D, 3D, and CubeMap) which are written to from CUDA kernels. Direct3D then renders the results on the screen.  A Direct3D capable device is required.
 | 
			
		||||
 | 
			
		||||
## Key Concepts
 | 
			
		||||
 | 
			
		||||
Graphics Interop, Texture
 | 
			
		||||
 | 
			
		||||
## Supported SM Architectures
 | 
			
		||||
 | 
			
		||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.9 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 | 
			
		||||
 | 
			
		||||
## Supported OSes
 | 
			
		||||
 | 
			
		||||
Windows
 | 
			
		||||
 | 
			
		||||
## Supported CPU Architecture
 | 
			
		||||
 | 
			
		||||
x86_64
 | 
			
		||||
 | 
			
		||||
## CUDA APIs involved
 | 
			
		||||
 | 
			
		||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 | 
			
		||||
cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaFree, cudaGetLastError, cudaGraphicsMapResources, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray
 | 
			
		||||
 | 
			
		||||
## Dependencies needed to build/run
 | 
			
		||||
[DirectX](../../../README.md#directx)
 | 
			
		||||
 | 
			
		||||
## Prerequisites
 | 
			
		||||
 | 
			
		||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 | 
			
		||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 | 
			
		||||
 | 
			
		||||
## Build and Run
 | 
			
		||||
 | 
			
		||||
### Windows
 | 
			
		||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
 | 
			
		||||
```
 | 
			
		||||
*_vs<version>.sln - for Visual Studio <version>
 | 
			
		||||
```
 | 
			
		||||
Each individual sample has its own set of solution files in its directory:
 | 
			
		||||
 | 
			
		||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
 | 
			
		||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
 | 
			
		||||
 | 
			
		||||
## References (for more details)
 | 
			
		||||
 | 
			
		||||
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 Before Width: | Height: | Size: 172 KiB  | 
										
											Binary file not shown.
										
									
								
							| 
		 Before Width: | Height: | Size: 67 KiB  | 
										
											Binary file not shown.
										
									
								
							| 
		 Before Width: | Height: | Size: 26 KiB  | 
@ -1,884 +0,0 @@
 | 
			
		||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
/* This example demonstrates how to use the CUDA Direct3D bindings to
 | 
			
		||||
 * transfer data between CUDA and DX9 2D, CubeMap, and Volume Textures.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
 | 
			
		||||
#define WINDOWS_LEAN_AND_MEAN
 | 
			
		||||
#include <windows.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
// This header inclues all the necessary D3D10 and CUDA includes
 | 
			
		||||
#include <cuda_runtime_api.h>
 | 
			
		||||
#include <cuda_d3d9_interop.h>
 | 
			
		||||
 | 
			
		||||
// includes, project
 | 
			
		||||
#include <rendercheck_d3d9.h>
 | 
			
		||||
#include <helper_cuda.h>
 | 
			
		||||
#include <helper_functions.h>  // includes cuda.h and cuda_runtime_api.h
 | 
			
		||||
 | 
			
		||||
#include <cassert>
 | 
			
		||||
 | 
			
		||||
#define MAX_EPSILON 10
 | 
			
		||||
 | 
			
		||||
static char *SDK_name = "simpleD3D9Texture";
 | 
			
		||||
 | 
			
		||||
bool g_bDone = false;
 | 
			
		||||
bool g_bPassed = true;
 | 
			
		||||
IDirect3D9Ex *g_pD3D;  // Used to create the D3DDevice
 | 
			
		||||
unsigned int g_iAdapter;
 | 
			
		||||
IDirect3DDevice9Ex *g_pD3DDevice;
 | 
			
		||||
 | 
			
		||||
D3DDISPLAYMODEEX g_d3ddm;
 | 
			
		||||
D3DPRESENT_PARAMETERS g_d3dpp;
 | 
			
		||||
 | 
			
		||||
bool g_bWindowed = true;
 | 
			
		||||
bool g_bDeviceLost = false;
 | 
			
		||||
 | 
			
		||||
const unsigned int g_WindowWidth = 720;
 | 
			
		||||
const unsigned int g_WindowHeight = 720;
 | 
			
		||||
 | 
			
		||||
int g_iFrameToCompare = 10;
 | 
			
		||||
 | 
			
		||||
int *pArgc = NULL;
 | 
			
		||||
char **pArgv = NULL;
 | 
			
		||||
 | 
			
		||||
// Data structure for 2D texture shared between DX9 and CUDA
 | 
			
		||||
struct {
 | 
			
		||||
  IDirect3DTexture9 *pTexture;
 | 
			
		||||
  cudaGraphicsResource *cudaResource;
 | 
			
		||||
  void *cudaLinearMemory;
 | 
			
		||||
  size_t pitch;
 | 
			
		||||
  int width;
 | 
			
		||||
  int height;
 | 
			
		||||
} g_texture_2d;
 | 
			
		||||
 | 
			
		||||
// Data structure for cube texture shared between DX9 and CUDA
 | 
			
		||||
struct {
 | 
			
		||||
  IDirect3DCubeTexture9 *pTexture;
 | 
			
		||||
  cudaGraphicsResource *cudaResource;
 | 
			
		||||
  void *cudaLinearMemory;
 | 
			
		||||
  size_t pitch;
 | 
			
		||||
  int size;
 | 
			
		||||
} g_texture_cube;
 | 
			
		||||
 | 
			
		||||
// Data structure for volume textures shared between DX9 and CUDA
 | 
			
		||||
struct {
 | 
			
		||||
  IDirect3DVolumeTexture9 *pTexture;
 | 
			
		||||
  cudaGraphicsResource *cudaResource;
 | 
			
		||||
  void *cudaLinearMemory;
 | 
			
		||||
  size_t pitch;
 | 
			
		||||
  int width;
 | 
			
		||||
  int height;
 | 
			
		||||
  int depth;
 | 
			
		||||
} g_texture_vol;
 | 
			
		||||
 | 
			
		||||
// The CUDA kernel launchers that get called
 | 
			
		||||
extern "C" {
 | 
			
		||||
bool cuda_texture_2d(void *surface, size_t width, size_t height, size_t pitch,
 | 
			
		||||
                     float t);
 | 
			
		||||
bool cuda_texture_cube(void *surface, int width, int height, size_t pitch,
 | 
			
		||||
                       int face, float t);
 | 
			
		||||
bool cuda_texture_volume(void *surface, int width, int height, int depth,
 | 
			
		||||
                         size_t pitch, size_t pitchslice, float t);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Forward declarations
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
HRESULT InitD3D9(HWND hWnd);
 | 
			
		||||
HRESULT InitCUDA();
 | 
			
		||||
HRESULT InitTextures();
 | 
			
		||||
HRESULT ReleaseTextures();
 | 
			
		||||
HRESULT RegisterD3D9ResourceWithCUDA();
 | 
			
		||||
HRESULT DeviceLostHandler();
 | 
			
		||||
 | 
			
		||||
void RunKernels();
 | 
			
		||||
HRESULT DrawScene();
 | 
			
		||||
void Cleanup();
 | 
			
		||||
void RunCUDA();
 | 
			
		||||
 | 
			
		||||
LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
 | 
			
		||||
 | 
			
		||||
#define NAME_LEN 512
 | 
			
		||||
 | 
			
		||||
char device_name[NAME_LEN];
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Program main
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
int main(int argc, char *argv[]) {
 | 
			
		||||
  char *ref_file = NULL;
 | 
			
		||||
 | 
			
		||||
  pArgc = &argc;
 | 
			
		||||
  pArgv = argv;
 | 
			
		||||
 | 
			
		||||
  printf("[%s] - Starting...\n", SDK_name);
 | 
			
		||||
 | 
			
		||||
  // command line options
 | 
			
		||||
  if (argc > 1) {
 | 
			
		||||
    // automatied build testing harness
 | 
			
		||||
    if (checkCmdLineFlag(argc, (const char **)argv, "file"))
 | 
			
		||||
      getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
//
 | 
			
		||||
// create window
 | 
			
		||||
//
 | 
			
		||||
// Register the window class
 | 
			
		||||
#if 1
 | 
			
		||||
  WNDCLASSEX wc = {sizeof(WNDCLASSEX),          CS_CLASSDC, MsgProc, 0L,   0L,
 | 
			
		||||
                   GetModuleHandle(NULL),       NULL,       NULL,    NULL, NULL,
 | 
			
		||||
                   "CUDA/D3D9 Texture InterOP", NULL};
 | 
			
		||||
  RegisterClassEx(&wc);
 | 
			
		||||
 | 
			
		||||
  int xBorder = ::GetSystemMetrics(SM_CXSIZEFRAME);
 | 
			
		||||
  int yMenu = ::GetSystemMetrics(SM_CYMENU);
 | 
			
		||||
  int yBorder = ::GetSystemMetrics(SM_CYSIZEFRAME);
 | 
			
		||||
 | 
			
		||||
  // Create the application's window (padding by window border for uniform BB
 | 
			
		||||
  // sizes across OSs)
 | 
			
		||||
  HWND hWnd = CreateWindow(
 | 
			
		||||
      wc.lpszClassName, "CUDA/D3D9 Texture InterOP", WS_OVERLAPPEDWINDOW, 0, 0,
 | 
			
		||||
      g_WindowWidth + 2 * xBorder, g_WindowHeight + 2 * yBorder + yMenu, NULL,
 | 
			
		||||
      NULL, wc.hInstance, NULL);
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
  static WNDCLASSEX wc = {
 | 
			
		||||
      sizeof(WNDCLASSEX),    CS_CLASSDC, MsgProc, 0L,   0L,
 | 
			
		||||
      GetModuleHandle(NULL), NULL,       NULL,    NULL, NULL,
 | 
			
		||||
      "CudaD3D9Tex",         NULL};
 | 
			
		||||
  RegisterClassEx(&wc);
 | 
			
		||||
  HWND hWnd = CreateWindow("CudaD3D9Tex", "CUDA D3D9 Texture Interop",
 | 
			
		||||
                           WS_OVERLAPPEDWINDOW, 0, 0, 800, 320,
 | 
			
		||||
                           GetDesktopWindow(), NULL, wc.hInstance, NULL);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  ShowWindow(hWnd, SW_SHOWDEFAULT);
 | 
			
		||||
  UpdateWindow(hWnd);
 | 
			
		||||
 | 
			
		||||
  // Initialize Direct3D
 | 
			
		||||
  if (SUCCEEDED(InitD3D9(hWnd)) && SUCCEEDED(InitCUDA()) &&
 | 
			
		||||
      SUCCEEDED(InitTextures())) {
 | 
			
		||||
    if (!g_bDeviceLost) {
 | 
			
		||||
      RegisterD3D9ResourceWithCUDA();
 | 
			
		||||
    }
 | 
			
		||||
  } else {
 | 
			
		||||
    printf("\n");
 | 
			
		||||
    printf("  No CUDA-compatible Direct3D9 device available\n");
 | 
			
		||||
    printf("WAIVED\n");
 | 
			
		||||
    exit(EXIT_WAIVED);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // the main loop
 | 
			
		||||
  //
 | 
			
		||||
  while (false == g_bDone) {
 | 
			
		||||
    RunCUDA();
 | 
			
		||||
    DrawScene();
 | 
			
		||||
 | 
			
		||||
    //
 | 
			
		||||
    // handle I/O
 | 
			
		||||
    //
 | 
			
		||||
    MSG msg;
 | 
			
		||||
    ZeroMemory(&msg, sizeof(msg));
 | 
			
		||||
 | 
			
		||||
    while (msg.message != WM_QUIT) {
 | 
			
		||||
      if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE)) {
 | 
			
		||||
        TranslateMessage(&msg);
 | 
			
		||||
        DispatchMessage(&msg);
 | 
			
		||||
      } else {
 | 
			
		||||
        RunCUDA();
 | 
			
		||||
        DrawScene();
 | 
			
		||||
 | 
			
		||||
        if (ref_file) {
 | 
			
		||||
          for (int count = 0; count < g_iFrameToCompare; count++) {
 | 
			
		||||
            RunCUDA();
 | 
			
		||||
            DrawScene();
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          const char *cur_image_path = "simpleD3D9Texture.ppm";
 | 
			
		||||
 | 
			
		||||
          // Save a reference of our current test run image
 | 
			
		||||
          CheckRenderD3D9::BackbufferToPPM(g_pD3DDevice, cur_image_path);
 | 
			
		||||
 | 
			
		||||
          // compare to offical reference image, printing PASS or FAIL.
 | 
			
		||||
          g_bPassed = CheckRenderD3D9::PPMvsPPM(cur_image_path, ref_file,
 | 
			
		||||
                                                argv[0], MAX_EPSILON, 0.15f);
 | 
			
		||||
 | 
			
		||||
          g_bDone = true;
 | 
			
		||||
 | 
			
		||||
          Cleanup();
 | 
			
		||||
          PostQuitMessage(0);
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  // Unregister windows class
 | 
			
		||||
  UnregisterClass(wc.lpszClassName, wc.hInstance);
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // and exit
 | 
			
		||||
  //
 | 
			
		||||
  printf("> %s running on %s exiting...\n", SDK_name, device_name);
 | 
			
		||||
 | 
			
		||||
  exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: InitD3D9()
 | 
			
		||||
// Desc: Initializes Direct3D9
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
HRESULT InitD3D9(HWND hWnd) {
 | 
			
		||||
  // Create the D3D object.
 | 
			
		||||
  if (S_OK != Direct3DCreate9Ex(D3D_SDK_VERSION, &g_pD3D)) {
 | 
			
		||||
    return E_FAIL;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  D3DADAPTER_IDENTIFIER9 adapterId;
 | 
			
		||||
  int device;
 | 
			
		||||
  bool bDeviceFound = false;
 | 
			
		||||
  printf("\n");
 | 
			
		||||
 | 
			
		||||
  cudaError cuStatus;
 | 
			
		||||
 | 
			
		||||
  for (g_iAdapter = 0; g_iAdapter < g_pD3D->GetAdapterCount(); g_iAdapter++) {
 | 
			
		||||
    HRESULT hr = g_pD3D->GetAdapterIdentifier(g_iAdapter, 0, &adapterId);
 | 
			
		||||
 | 
			
		||||
    if (FAILED(hr)) {
 | 
			
		||||
      continue;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    cuStatus = cudaD3D9GetDevice(&device, adapterId.DeviceName);
 | 
			
		||||
    // This prints and resets the cudaError to cudaSuccess
 | 
			
		||||
    printLastCudaError("cudaD3D9GetDevice failed");
 | 
			
		||||
 | 
			
		||||
    printf("> Display Device #%d: \"%s\" %s Direct3D9\n", g_iAdapter,
 | 
			
		||||
           adapterId.Description,
 | 
			
		||||
           (cuStatus == cudaSuccess) ? "supports" : "does not support");
 | 
			
		||||
 | 
			
		||||
    if (cudaSuccess == cuStatus) {
 | 
			
		||||
      bDeviceFound = true;
 | 
			
		||||
      STRCPY(device_name, NAME_LEN, adapterId.Description);
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // we check to make sure we have found a cuda-compatible D3D device to work on
 | 
			
		||||
  if (!bDeviceFound) {
 | 
			
		||||
    printf("\n");
 | 
			
		||||
    printf("  No CUDA-compatible Direct3D9 device available\n");
 | 
			
		||||
    printf("PASSED\n");
 | 
			
		||||
    // destroy the D3D device
 | 
			
		||||
    g_pD3D->Release();
 | 
			
		||||
    exit(EXIT_SUCCESS);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Create the D3D Display Device
 | 
			
		||||
  RECT rc;
 | 
			
		||||
  GetClientRect(hWnd, &rc);
 | 
			
		||||
  D3DDISPLAYMODE d3ddm;
 | 
			
		||||
  g_pD3D->GetAdapterDisplayMode(g_iAdapter, &d3ddm);
 | 
			
		||||
  D3DPRESENT_PARAMETERS d3dpp;
 | 
			
		||||
  ZeroMemory(&d3dpp, sizeof(d3dpp));
 | 
			
		||||
  d3dpp.Windowed = TRUE;
 | 
			
		||||
  d3dpp.BackBufferCount = 1;
 | 
			
		||||
  d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
 | 
			
		||||
  d3dpp.hDeviceWindow = hWnd;
 | 
			
		||||
  // d3dpp.BackBufferWidth = g_bQAReadback?g_WindowWidth:(rc.right - rc.left);
 | 
			
		||||
  // d3dpp.BackBufferHeight = g_bQAReadback?g_WindowHeight:(rc.bottom - rc.top);
 | 
			
		||||
  d3dpp.BackBufferWidth = g_WindowWidth;
 | 
			
		||||
  d3dpp.BackBufferHeight = g_WindowHeight;
 | 
			
		||||
 | 
			
		||||
  d3dpp.BackBufferFormat = d3ddm.Format;
 | 
			
		||||
 | 
			
		||||
  if (FAILED(g_pD3D->CreateDeviceEx(g_iAdapter, D3DDEVTYPE_HAL, hWnd,
 | 
			
		||||
                                    D3DCREATE_HARDWARE_VERTEXPROCESSING, &d3dpp,
 | 
			
		||||
                                    NULL, &g_pD3DDevice))) {
 | 
			
		||||
    return E_FAIL;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // We clear the back buffer
 | 
			
		||||
  g_pD3DDevice->BeginScene();
 | 
			
		||||
  g_pD3DDevice->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0);
 | 
			
		||||
  g_pD3DDevice->EndScene();
 | 
			
		||||
 | 
			
		||||
  return S_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
HRESULT InitCUDA() {
 | 
			
		||||
  printf("InitCUDA() g_pD3DDevice = %p\n", g_pD3DDevice);
 | 
			
		||||
 | 
			
		||||
  // Now we need to bind a CUDA context to the DX9 device
 | 
			
		||||
  // This is the CUDA 2.0 DX9 interface (required for Windows XP and Vista)
 | 
			
		||||
  cudaD3D9SetDirect3DDevice(g_pD3DDevice);
 | 
			
		||||
  getLastCudaError("cudaD3D9SetDirect3DDevice failed");
 | 
			
		||||
 | 
			
		||||
  return S_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
HRESULT RegisterD3D9ResourceWithCUDA() {
 | 
			
		||||
  // 2D
 | 
			
		||||
  // register the Direct3D resources that we'll use
 | 
			
		||||
  // we'll read to and write from g_texture_2d, so don't set any special map
 | 
			
		||||
  // flags for it
 | 
			
		||||
  cudaGraphicsD3D9RegisterResource(&g_texture_2d.cudaResource,
 | 
			
		||||
                                   g_texture_2d.pTexture,
 | 
			
		||||
                                   cudaGraphicsRegisterFlagsNone);
 | 
			
		||||
  getLastCudaError("cudaGraphicsD3D9RegisterResource (g_texture_2d) failed");
 | 
			
		||||
  // cuda cannot write into the texture directly : the texture is seen as a
 | 
			
		||||
  // cudaArray and can only be mapped as a texture
 | 
			
		||||
  // Create a buffer so that cuda can write into it
 | 
			
		||||
  // pixel fmt is DXGI_FORMAT_R32G32B32A32_FLOAT
 | 
			
		||||
  cudaMallocPitch(&g_texture_2d.cudaLinearMemory, &g_texture_2d.pitch,
 | 
			
		||||
                  g_texture_2d.width * sizeof(float) * 4, g_texture_2d.height);
 | 
			
		||||
  getLastCudaError("cudaMallocPitch (g_texture_2d) failed");
 | 
			
		||||
  cudaMemset(g_texture_2d.cudaLinearMemory, 1,
 | 
			
		||||
             g_texture_2d.pitch * g_texture_2d.height);
 | 
			
		||||
 | 
			
		||||
  // CUBE
 | 
			
		||||
  cudaGraphicsD3D9RegisterResource(&g_texture_cube.cudaResource,
 | 
			
		||||
                                   g_texture_cube.pTexture,
 | 
			
		||||
                                   cudaGraphicsRegisterFlagsNone);
 | 
			
		||||
  getLastCudaError("cudaGraphicsD3D9RegisterResource (g_texture_cube) failed");
 | 
			
		||||
  // create the buffer. pixel fmt is DXGI_FORMAT_R8G8B8A8_SNORM
 | 
			
		||||
  cudaMallocPitch(&g_texture_cube.cudaLinearMemory, &g_texture_cube.pitch,
 | 
			
		||||
                  g_texture_cube.size * 4, g_texture_cube.size);
 | 
			
		||||
  getLastCudaError("cudaMallocPitch (g_texture_cube) failed");
 | 
			
		||||
  cudaMemset(g_texture_cube.cudaLinearMemory, 1,
 | 
			
		||||
             g_texture_cube.pitch * g_texture_cube.size);
 | 
			
		||||
  getLastCudaError("cudaMemset (g_texture_cube) failed");
 | 
			
		||||
 | 
			
		||||
  // 3D
 | 
			
		||||
  cudaGraphicsD3D9RegisterResource(&g_texture_vol.cudaResource,
 | 
			
		||||
                                   g_texture_vol.pTexture,
 | 
			
		||||
                                   cudaGraphicsRegisterFlagsNone);
 | 
			
		||||
  getLastCudaError("cudaGraphicsD3D9RegisterResource (g_texture_vol) failed");
 | 
			
		||||
  // create the buffer. pixel fmt is DXGI_FORMAT_R8G8B8A8_SNORM
 | 
			
		||||
  // cudaMallocPitch(&g_texture_vol.cudaLinearMemory, &g_texture_vol.pitch,
 | 
			
		||||
  // g_texture_vol.width * 4, g_texture_vol.height * g_texture_vol.depth);
 | 
			
		||||
  cudaMalloc(
 | 
			
		||||
      &g_texture_vol.cudaLinearMemory,
 | 
			
		||||
      g_texture_vol.width * 4 * g_texture_vol.height * g_texture_vol.depth);
 | 
			
		||||
  g_texture_vol.pitch = g_texture_vol.width * 4;
 | 
			
		||||
  getLastCudaError("cudaMallocPitch (g_texture_vol) failed");
 | 
			
		||||
  cudaMemset(g_texture_vol.cudaLinearMemory, 1,
 | 
			
		||||
             g_texture_vol.pitch * g_texture_vol.height * g_texture_vol.depth);
 | 
			
		||||
  getLastCudaError("cudaMemset (g_texture_vol) failed");
 | 
			
		||||
 | 
			
		||||
  return S_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: InitTextures()
 | 
			
		||||
// Desc: Initializes Direct3D Textures (allocation and initialization)
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
HRESULT InitTextures() {
 | 
			
		||||
  //
 | 
			
		||||
  // create the D3D resources we'll be using
 | 
			
		||||
  //
 | 
			
		||||
 | 
			
		||||
  // 2D texture
 | 
			
		||||
  g_texture_2d.width = 256;
 | 
			
		||||
  g_texture_2d.height = 256;
 | 
			
		||||
 | 
			
		||||
  if (FAILED(g_pD3DDevice->CreateTexture(
 | 
			
		||||
          g_texture_2d.width, g_texture_2d.height, 1, 0, D3DFMT_A32B32G32R32F,
 | 
			
		||||
          D3DPOOL_DEFAULT, &g_texture_2d.pTexture, NULL))) {
 | 
			
		||||
    return E_FAIL;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // cube texture
 | 
			
		||||
  g_texture_cube.size = 64;
 | 
			
		||||
 | 
			
		||||
  if (FAILED(g_pD3DDevice->CreateCubeTexture(g_texture_cube.size, 1, 0,
 | 
			
		||||
                                             D3DFMT_A8R8G8B8, D3DPOOL_DEFAULT,
 | 
			
		||||
                                             &g_texture_cube.pTexture, NULL))) {
 | 
			
		||||
    return E_FAIL;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // 3D texture
 | 
			
		||||
  g_texture_vol.width = 64;
 | 
			
		||||
  g_texture_vol.height = 64;
 | 
			
		||||
  g_texture_vol.depth = 32;
 | 
			
		||||
 | 
			
		||||
  if (FAILED(g_pD3DDevice->CreateVolumeTexture(
 | 
			
		||||
          g_texture_vol.width, g_texture_vol.height, g_texture_vol.depth, 1, 0,
 | 
			
		||||
          D3DFMT_A8R8G8B8, D3DPOOL_DEFAULT, &g_texture_vol.pTexture, NULL))) {
 | 
			
		||||
    return E_FAIL;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return S_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: ReleaseTextures()
 | 
			
		||||
// Desc: Release Direct3D Textures (free-ing)
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
HRESULT ReleaseTextures() {
 | 
			
		||||
  // unregister the Cuda resources
 | 
			
		||||
  cudaGraphicsUnregisterResource(g_texture_2d.cudaResource);
 | 
			
		||||
  getLastCudaError("cudaGraphicsUnregisterResource (g_texture_2d) failed");
 | 
			
		||||
  cudaFree(g_texture_2d.cudaLinearMemory);
 | 
			
		||||
  getLastCudaError("cudaFree (g_texture_2d) failed");
 | 
			
		||||
 | 
			
		||||
  cudaGraphicsUnregisterResource(g_texture_cube.cudaResource);
 | 
			
		||||
  getLastCudaError("cudaGraphicsUnregisterResource (g_texture_cube) failed");
 | 
			
		||||
  cudaFree(g_texture_cube.cudaLinearMemory);
 | 
			
		||||
  getLastCudaError("cudaFree (g_texture_2d) failed");
 | 
			
		||||
 | 
			
		||||
  cudaGraphicsUnregisterResource(g_texture_vol.cudaResource);
 | 
			
		||||
  getLastCudaError("cudaGraphicsUnregisterResource (g_texture_vol) failed");
 | 
			
		||||
  cudaFree(g_texture_vol.cudaLinearMemory);
 | 
			
		||||
  getLastCudaError("cudaFree (g_texture_vol) failed");
 | 
			
		||||
 | 
			
		||||
  //
 | 
			
		||||
  // clean up Direct3D
 | 
			
		||||
  //
 | 
			
		||||
  {
 | 
			
		||||
    // release the resources we created
 | 
			
		||||
    g_texture_2d.pTexture->Release();
 | 
			
		||||
    g_texture_cube.pTexture->Release();
 | 
			
		||||
    g_texture_vol.pTexture->Release();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return S_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! Run the Cuda part of the computation
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
void RunKernels() {
 | 
			
		||||
  static float t = 0.0f;
 | 
			
		||||
 | 
			
		||||
  // populate the 2d texture
 | 
			
		||||
  {
 | 
			
		||||
    cudaArray *cuArray;
 | 
			
		||||
    cudaGraphicsSubResourceGetMappedArray(&cuArray, g_texture_2d.cudaResource,
 | 
			
		||||
                                          0, 0);
 | 
			
		||||
    getLastCudaError(
 | 
			
		||||
        "cudaGraphicsSubResourceGetMappedArray (cuda_texture_2d) failed");
 | 
			
		||||
 | 
			
		||||
    // kick off the kernel and send the staging buffer cudaLinearMemory as an
 | 
			
		||||
    // argument to allow the kernel to write to it
 | 
			
		||||
    cuda_texture_2d(g_texture_2d.cudaLinearMemory, g_texture_2d.width,
 | 
			
		||||
                    g_texture_2d.height, g_texture_2d.pitch, t);
 | 
			
		||||
    getLastCudaError("cuda_texture_2d failed");
 | 
			
		||||
 | 
			
		||||
    // then we want to copy cudaLinearMemory to the D3D texture, via its mapped
 | 
			
		||||
    // form : cudaArray
 | 
			
		||||
    cudaMemcpy2DToArray(
 | 
			
		||||
        cuArray,                                            // dst array
 | 
			
		||||
        0, 0,                                               // offset
 | 
			
		||||
        g_texture_2d.cudaLinearMemory, g_texture_2d.pitch,  // src
 | 
			
		||||
        g_texture_2d.width * 4 * sizeof(float), g_texture_2d.height,  // extent
 | 
			
		||||
        cudaMemcpyDeviceToDevice);                                    // kind
 | 
			
		||||
    getLastCudaError("cudaMemcpy2DToArray failed");
 | 
			
		||||
  }
 | 
			
		||||
  // populate the volume texture
 | 
			
		||||
  {
 | 
			
		||||
    size_t pitchSlice = g_texture_vol.pitch * g_texture_vol.height;
 | 
			
		||||
    cudaArray *cuArray;
 | 
			
		||||
    cudaGraphicsSubResourceGetMappedArray(&cuArray, g_texture_vol.cudaResource,
 | 
			
		||||
                                          0, 0);
 | 
			
		||||
    getLastCudaError(
 | 
			
		||||
        "cudaGraphicsSubResourceGetMappedArray (cuda_texture_3d) failed");
 | 
			
		||||
 | 
			
		||||
    // kick off the kernel and send the staging buffer cudaLinearMemory as an
 | 
			
		||||
    // argument to allow the kernel to write to it
 | 
			
		||||
    cuda_texture_volume(g_texture_vol.cudaLinearMemory, g_texture_vol.width,
 | 
			
		||||
                        g_texture_vol.height, g_texture_vol.depth,
 | 
			
		||||
                        g_texture_vol.pitch, pitchSlice, t);
 | 
			
		||||
    getLastCudaError("cuda_texture_3d failed");
 | 
			
		||||
 | 
			
		||||
    // then we want to copy cudaLinearMemory to the D3D texture, via its mapped
 | 
			
		||||
    // form : cudaArray
 | 
			
		||||
    struct cudaMemcpy3DParms memcpyParams = {0};
 | 
			
		||||
    memcpyParams.dstArray = cuArray;
 | 
			
		||||
    memcpyParams.srcPtr.ptr = g_texture_vol.cudaLinearMemory;
 | 
			
		||||
    memcpyParams.srcPtr.pitch = g_texture_vol.pitch;
 | 
			
		||||
    memcpyParams.srcPtr.xsize = g_texture_vol.width;
 | 
			
		||||
    memcpyParams.srcPtr.ysize = g_texture_vol.height;
 | 
			
		||||
    memcpyParams.extent.width = g_texture_vol.width;
 | 
			
		||||
    memcpyParams.extent.height = g_texture_vol.height;
 | 
			
		||||
    memcpyParams.extent.depth = g_texture_vol.depth;
 | 
			
		||||
    memcpyParams.kind = cudaMemcpyDeviceToDevice;
 | 
			
		||||
    cudaMemcpy3D(&memcpyParams);
 | 
			
		||||
    getLastCudaError("cudaMemcpy3D failed");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // populate the faces of the cube map
 | 
			
		||||
  for (int face = 0; face < 6; ++face) {
 | 
			
		||||
    cudaArray *cuArray;
 | 
			
		||||
    cudaGraphicsSubResourceGetMappedArray(&cuArray, g_texture_cube.cudaResource,
 | 
			
		||||
                                          face, 0);
 | 
			
		||||
    getLastCudaError(
 | 
			
		||||
        "cudaGraphicsSubResourceGetMappedArray (cuda_texture_cube) failed");
 | 
			
		||||
 | 
			
		||||
    // kick off the kernel and send the staging buffer cudaLinearMemory as an
 | 
			
		||||
    // argument to allow the kernel to write to it
 | 
			
		||||
    cuda_texture_cube(g_texture_cube.cudaLinearMemory, g_texture_cube.size,
 | 
			
		||||
                      g_texture_cube.size, g_texture_cube.pitch, face, t);
 | 
			
		||||
    getLastCudaError("cuda_texture_cube failed");
 | 
			
		||||
 | 
			
		||||
    // then we want to copy cudaLinearMemory to the D3D texture, via its mapped
 | 
			
		||||
    // form : cudaArray
 | 
			
		||||
    cudaMemcpy2DToArray(cuArray,  // dst array
 | 
			
		||||
                        0, 0,     // offset
 | 
			
		||||
                        g_texture_cube.cudaLinearMemory,
 | 
			
		||||
                        g_texture_cube.pitch,                          // src
 | 
			
		||||
                        g_texture_cube.size * 4, g_texture_cube.size,  // extent
 | 
			
		||||
                        cudaMemcpyDeviceToDevice);                     // kind
 | 
			
		||||
    getLastCudaError("cudaMemcpy2DToArray failed");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  t += 0.1f;
 | 
			
		||||
}
 | 
			
		||||
/*{
 | 
			
		||||
    static float t = 0.0f;
 | 
			
		||||
 | 
			
		||||
    // populate the 2d texture
 | 
			
		||||
    {
 | 
			
		||||
        void* pData;
 | 
			
		||||
        size_t pitch;
 | 
			
		||||
        checkCudaErrorsNoSync ( cudaD3D9ResourceGetMappedPointer(&pData,
 | 
			
		||||
g_texture_2d.pTexture, 0, 0) );
 | 
			
		||||
        checkCudaErrorsNoSync ( cudaD3D9ResourceGetMappedPitch(&pitch, NULL,
 | 
			
		||||
g_texture_2d.pTexture, 0, 0) );
 | 
			
		||||
        cuda_texture_2d(pData, g_texture_2d.width, g_texture_2d.height, pitch,
 | 
			
		||||
t);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // populate the faces of the cube map
 | 
			
		||||
    for (int face = 0; face < 6; ++face)
 | 
			
		||||
    {
 | 
			
		||||
        void* pData;
 | 
			
		||||
        size_t pitch;
 | 
			
		||||
        checkCudaErrorsNoSync ( cudaD3D9ResourceGetMappedPointer(&pData,
 | 
			
		||||
g_texture_cube.pTexture, face, 0) );
 | 
			
		||||
        checkCudaErrorsNoSync ( cudaD3D9ResourceGetMappedPitch(&pitch, NULL,
 | 
			
		||||
g_texture_cube.pTexture, face, 0) );
 | 
			
		||||
        cuda_texture_cube(pData, g_texture_cube.size, g_texture_cube.size,
 | 
			
		||||
pitch, face, t);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // populate the volume texture
 | 
			
		||||
    {
 | 
			
		||||
        void* pData;
 | 
			
		||||
        size_t pitch;
 | 
			
		||||
        size_t pitchSlice;
 | 
			
		||||
        checkCudaErrorsNoSync ( cudaD3D9ResourceGetMappedPointer(&pData,
 | 
			
		||||
g_texture_vol.pTexture, 0, 0) );
 | 
			
		||||
        checkCudaErrorsNoSync ( cudaD3D9ResourceGetMappedPitch(&pitch,
 | 
			
		||||
&pitchSlice, g_texture_vol.pTexture, 0, 0) );
 | 
			
		||||
        cuda_texture_volume(pData, g_texture_vol.width, g_texture_vol.height,
 | 
			
		||||
g_texture_vol.depth, pitch, pitchSlice);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    t += 0.1f;
 | 
			
		||||
}*/
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! RestoreContextResources
 | 
			
		||||
//    - this function restores all of the CUDA/D3D resources and contexts
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
HRESULT RestoreContextResources() {
 | 
			
		||||
  // Reinitialize D3D9 resources, CUDA resources/contexts
 | 
			
		||||
  InitCUDA();
 | 
			
		||||
  InitTextures();
 | 
			
		||||
  RegisterD3D9ResourceWithCUDA();
 | 
			
		||||
 | 
			
		||||
  return S_OK;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! DeviceLostHandler
 | 
			
		||||
//    - this function handles reseting and initialization of the D3D device
 | 
			
		||||
//      in the event this Device gets Lost
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
HRESULT DeviceLostHandler() {
 | 
			
		||||
  HRESULT hr = S_OK;
 | 
			
		||||
 | 
			
		||||
  fprintf(stderr, "-> Starting DeviceLostHandler() \n");
 | 
			
		||||
 | 
			
		||||
  // test the cooperative level to see if it's okay
 | 
			
		||||
  // to render
 | 
			
		||||
  if (FAILED(hr = g_pD3DDevice->TestCooperativeLevel())) {
 | 
			
		||||
    fprintf(stderr,
 | 
			
		||||
            "TestCooperativeLevel = %08x failed, will attempt to reset\n", hr);
 | 
			
		||||
 | 
			
		||||
    // if the device was truly lost, (i.e., a fullscreen device just lost
 | 
			
		||||
    // focus), wait
 | 
			
		||||
    // until we g_et it back
 | 
			
		||||
 | 
			
		||||
    if (hr == D3DERR_DEVICELOST) {
 | 
			
		||||
      fprintf(stderr,
 | 
			
		||||
              "TestCooperativeLevel = %08x DeviceLost, will retry next call\n",
 | 
			
		||||
              hr);
 | 
			
		||||
      return S_OK;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // eventually, we will g_et this return value,
 | 
			
		||||
    // indicating that we can now reset the device
 | 
			
		||||
    if (hr == D3DERR_DEVICENOTRESET) {
 | 
			
		||||
      fprintf(stderr,
 | 
			
		||||
              "TestCooperativeLevel = %08x will try to RESET the device\n", hr);
 | 
			
		||||
      // if we are windowed, read the desktop mode and use the same format for
 | 
			
		||||
      // the back buffer; this effectively turns off color conversion
 | 
			
		||||
 | 
			
		||||
      if (g_bWindowed) {
 | 
			
		||||
        g_pD3D->GetAdapterDisplayModeEx(g_iAdapter, &g_d3ddm, NULL);
 | 
			
		||||
        g_d3dpp.BackBufferFormat = g_d3ddm.Format;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // now try to reset the device
 | 
			
		||||
      if (FAILED(hr = g_pD3DDevice->Reset(&g_d3dpp))) {
 | 
			
		||||
        fprintf(stderr, "TestCooperativeLevel = %08x RESET device FAILED\n",
 | 
			
		||||
                hr);
 | 
			
		||||
        return hr;
 | 
			
		||||
      } else {
 | 
			
		||||
        fprintf(stderr, "TestCooperativeLevel = %08x RESET device SUCCESS!\n",
 | 
			
		||||
                hr);
 | 
			
		||||
 | 
			
		||||
        // This is a common function we use to restore all hardware
 | 
			
		||||
        // resources/state
 | 
			
		||||
        RestoreContextResources();
 | 
			
		||||
 | 
			
		||||
        fprintf(stderr, "TestCooperativeLevel = %08x INIT device SUCCESS!\n",
 | 
			
		||||
                hr);
 | 
			
		||||
 | 
			
		||||
        // we have acquired the device
 | 
			
		||||
        g_bDeviceLost = false;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return hr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
//! Draw the final result on the screen
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
HRESULT DrawScene() {
 | 
			
		||||
  HRESULT hr = S_OK;
 | 
			
		||||
 | 
			
		||||
  if (g_bDeviceLost) {
 | 
			
		||||
    if (FAILED(hr = DeviceLostHandler())) {
 | 
			
		||||
      fprintf(stderr, "DeviceLostHandler FAILED returned %08x\n", hr);
 | 
			
		||||
      return hr;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (!g_bDeviceLost) {
 | 
			
		||||
    //
 | 
			
		||||
    // we will use this index and vertex data throughout
 | 
			
		||||
    //
 | 
			
		||||
    unsigned int IB[6] = {
 | 
			
		||||
        0, 1, 2, 0, 2, 3,
 | 
			
		||||
    };
 | 
			
		||||
    struct VertexStruct {
 | 
			
		||||
      float position[3];
 | 
			
		||||
      float texture[3];
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    //
 | 
			
		||||
    // initialize the scene
 | 
			
		||||
    //
 | 
			
		||||
    D3DVIEWPORT9 viewport_window = {0, 0, 672, 192, 0, 1};
 | 
			
		||||
    g_pD3DDevice->SetViewport(&viewport_window);
 | 
			
		||||
    g_pD3DDevice->BeginScene();
 | 
			
		||||
    g_pD3DDevice->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0);
 | 
			
		||||
    g_pD3DDevice->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE);
 | 
			
		||||
    g_pD3DDevice->SetRenderState(D3DRS_LIGHTING, FALSE);
 | 
			
		||||
    g_pD3DDevice->SetFVF(D3DFVF_XYZ | D3DFVF_TEX1 | D3DFVF_TEXCOORDSIZE3(0));
 | 
			
		||||
 | 
			
		||||
    //
 | 
			
		||||
    // draw the 2d texture
 | 
			
		||||
    //
 | 
			
		||||
    VertexStruct VB[4] = {
 | 
			
		||||
      {  {-1,-1,0,}, {0,0,0,},  },
 | 
			
		||||
      {  { 1,-1,0,}, {1,0,0,},  },
 | 
			
		||||
      {  { 1, 1,0,}, {1,1,0,},  },
 | 
			
		||||
      {  {-1, 1,0,}, {0,1,0,},  },
 | 
			
		||||
    };
 | 
			
		||||
    D3DVIEWPORT9 viewport = {32, 32, 256, 256, 0, 1};
 | 
			
		||||
    g_pD3DDevice->SetViewport(&viewport);
 | 
			
		||||
    g_pD3DDevice->SetTexture(0, g_texture_2d.pTexture);
 | 
			
		||||
    g_pD3DDevice->DrawIndexedPrimitiveUP(D3DPT_TRIANGLELIST, 0, 4, 2, IB,
 | 
			
		||||
                                         D3DFMT_INDEX32, VB,
 | 
			
		||||
                                         sizeof(VertexStruct));
 | 
			
		||||
 | 
			
		||||
    //
 | 
			
		||||
    // draw the Z-positive side of the cube texture
 | 
			
		||||
    //
 | 
			
		||||
    VertexStruct VB_Zpos[4] = {
 | 
			
		||||
      {  {-1,-1,0,}, {-1,-1, 0.5f,},  },
 | 
			
		||||
      {  { 1,-1,0,}, { 1,-1, 0.5f,},  },
 | 
			
		||||
      {  { 1, 1,0,}, { 1, 1, 0.5f,},  },
 | 
			
		||||
      {  {-1, 1,0,}, {-1, 1, 0.5f,},  },
 | 
			
		||||
    };
 | 
			
		||||
    viewport.Y += viewport.Height + 32;
 | 
			
		||||
    g_pD3DDevice->SetViewport(&viewport);
 | 
			
		||||
    g_pD3DDevice->SetTexture(0, g_texture_cube.pTexture);
 | 
			
		||||
    g_pD3DDevice->DrawIndexedPrimitiveUP(D3DPT_TRIANGLELIST, 0, 4, 2, IB,
 | 
			
		||||
                                         D3DFMT_INDEX32, VB_Zpos,
 | 
			
		||||
                                         sizeof(VertexStruct));
 | 
			
		||||
 | 
			
		||||
    //
 | 
			
		||||
    // draw the Z-negative side of the cube texture
 | 
			
		||||
    //
 | 
			
		||||
    VertexStruct VB_Zneg[4] = {
 | 
			
		||||
      {  {-1,-1,0,}, { 1,-1,-0.5f,},  },
 | 
			
		||||
      {  { 1,-1,0,}, {-1,-1,-0.5f,},  },
 | 
			
		||||
      {  { 1, 1,0,}, {-1, 1,-0.5f,},  },
 | 
			
		||||
      {  {-1, 1,0,}, { 1, 1,-0.5f,},  },
 | 
			
		||||
    };
 | 
			
		||||
    viewport.X += viewport.Width + 32;
 | 
			
		||||
    g_pD3DDevice->SetViewport(&viewport);
 | 
			
		||||
    g_pD3DDevice->SetTexture(0, g_texture_cube.pTexture);
 | 
			
		||||
    g_pD3DDevice->DrawIndexedPrimitiveUP(D3DPT_TRIANGLELIST, 0, 4, 2, IB,
 | 
			
		||||
                                         D3DFMT_INDEX32, VB_Zneg,
 | 
			
		||||
                                         sizeof(VertexStruct));
 | 
			
		||||
 | 
			
		||||
    //
 | 
			
		||||
    // draw a slice the volume texture
 | 
			
		||||
    //
 | 
			
		||||
    VertexStruct VB_Zslice[4] = {
 | 
			
		||||
      {  {-1,-1,0,}, {0,0,0,},  },
 | 
			
		||||
      {  { 1,-1,0,}, {1,0,0,},  },
 | 
			
		||||
      {  { 1, 1,0,}, {1,1,1,},  },
 | 
			
		||||
      {  {-1, 1,0,}, {0,1,1,},  },
 | 
			
		||||
    };
 | 
			
		||||
    viewport.Y -= viewport.Height + 32;
 | 
			
		||||
    g_pD3DDevice->SetViewport(&viewport);
 | 
			
		||||
    g_pD3DDevice->SetTexture(0, g_texture_vol.pTexture);
 | 
			
		||||
    g_pD3DDevice->DrawIndexedPrimitiveUP(D3DPT_TRIANGLELIST, 0, 4, 2, IB,
 | 
			
		||||
                                         D3DFMT_INDEX32, VB_Zslice,
 | 
			
		||||
                                         sizeof(VertexStruct));
 | 
			
		||||
 | 
			
		||||
    //
 | 
			
		||||
    // end the scene
 | 
			
		||||
    //
 | 
			
		||||
    g_pD3DDevice->EndScene();
 | 
			
		||||
    hr = g_pD3DDevice->Present(NULL, NULL, NULL, NULL);
 | 
			
		||||
 | 
			
		||||
    if (hr == D3DERR_DEVICELOST) {
 | 
			
		||||
      fprintf(stderr, "DrawScene Present = %08x detected D3D DeviceLost\n", hr);
 | 
			
		||||
      g_bDeviceLost = true;
 | 
			
		||||
 | 
			
		||||
      ReleaseTextures();
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return hr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: Cleanup()
 | 
			
		||||
// Desc: Releases all previously initialized objects
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
void Cleanup() {
 | 
			
		||||
  ReleaseTextures();
 | 
			
		||||
 | 
			
		||||
  {
 | 
			
		||||
    // destroy the D3D device
 | 
			
		||||
    g_pD3DDevice->Release();
 | 
			
		||||
    g_pD3D->Release();
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: RunCUDA()
 | 
			
		||||
// Desc: Launches the CUDA kernels to fill in the texture data
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
void RunCUDA() {
 | 
			
		||||
  //
 | 
			
		||||
  // map the resources we've registered so we can access them in Cuda
 | 
			
		||||
  // - it is most efficient to map and unmap all resources in a single call,
 | 
			
		||||
  //   and to have the map/unmap calls be the boundary between using the GPU
 | 
			
		||||
  //   for Direct3D and Cuda
 | 
			
		||||
  //
 | 
			
		||||
 | 
			
		||||
  if (!g_bDeviceLost) {
 | 
			
		||||
    cudaStream_t stream = 0;
 | 
			
		||||
    const int nbResources = 3;
 | 
			
		||||
    cudaGraphicsResource *ppResources[nbResources] = {
 | 
			
		||||
        g_texture_2d.cudaResource, g_texture_vol.cudaResource,
 | 
			
		||||
        g_texture_cube.cudaResource,
 | 
			
		||||
    };
 | 
			
		||||
    cudaGraphicsMapResources(nbResources, ppResources, stream);
 | 
			
		||||
    getLastCudaError("cudaGraphicsMapResources(3) failed");
 | 
			
		||||
 | 
			
		||||
    //
 | 
			
		||||
    // run kernels which will populate the contents of those textures
 | 
			
		||||
    //
 | 
			
		||||
    RunKernels();
 | 
			
		||||
 | 
			
		||||
    //
 | 
			
		||||
    // unmap the resources
 | 
			
		||||
    //
 | 
			
		||||
    cudaGraphicsUnmapResources(nbResources, ppResources, stream);
 | 
			
		||||
    getLastCudaError("cudaGraphicsUnmapResources(3) failed");
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
// Name: MsgProc()
 | 
			
		||||
// Desc: The window's message handler
 | 
			
		||||
//-----------------------------------------------------------------------------
 | 
			
		||||
static LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam,
 | 
			
		||||
                              LPARAM lParam) {
 | 
			
		||||
  switch (msg) {
 | 
			
		||||
    case WM_KEYDOWN:
 | 
			
		||||
      if (wParam == VK_ESCAPE) {
 | 
			
		||||
        g_bDone = true;
 | 
			
		||||
        Cleanup();
 | 
			
		||||
        PostQuitMessage(0);
 | 
			
		||||
        return 0;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
    case WM_DESTROY:
 | 
			
		||||
      g_bDone = true;
 | 
			
		||||
      Cleanup();
 | 
			
		||||
      PostQuitMessage(0);
 | 
			
		||||
      return 0;
 | 
			
		||||
 | 
			
		||||
    case WM_PAINT:
 | 
			
		||||
      ValidateRect(hWnd, NULL);
 | 
			
		||||
      return 0;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return DefWindowProc(hWnd, msg, wParam, lParam);
 | 
			
		||||
}
 | 
			
		||||
@ -1,78 +0,0 @@
 | 
			
		||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
#define PI 3.1415926536f
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Paint a 2D texture with a moving red/green hatch pattern on a
 | 
			
		||||
 * strobing blue background.  Note that this kernel reads to and
 | 
			
		||||
 * writes from the texture, hence why this texture was not mapped
 | 
			
		||||
 * as WriteDiscard.
 | 
			
		||||
 */
 | 
			
		||||
__global__ void cuda_kernel_texture_2d(unsigned char *surface, int width,
 | 
			
		||||
                                       int height, size_t pitch, float t) {
 | 
			
		||||
  int x = blockIdx.x * blockDim.x + threadIdx.x;
 | 
			
		||||
  int y = blockIdx.y * blockDim.y + threadIdx.y;
 | 
			
		||||
  float *pixel;
 | 
			
		||||
 | 
			
		||||
  // in the case where, due to quantization into grids, we have
 | 
			
		||||
  // more threads than pixels, skip the threads which don't
 | 
			
		||||
  // correspond to valid pixels
 | 
			
		||||
  if (x >= width || y >= height) return;
 | 
			
		||||
 | 
			
		||||
  // get a pointer to the pixel at (x,y)
 | 
			
		||||
  pixel = (float *)(surface + y * pitch) + 4 * x;
 | 
			
		||||
 | 
			
		||||
  // populate it
 | 
			
		||||
  float value_x = 0.5f + 0.5f * cos(t + 10.0f * ((2.0f * x) / width - 1.0f));
 | 
			
		||||
  float value_y = 0.5f + 0.5f * cos(t + 10.0f * ((2.0f * y) / height - 1.0f));
 | 
			
		||||
  pixel[0] = 0.5 * pixel[0] + 0.5 * pow(value_x, 3.0f);  // red
 | 
			
		||||
  pixel[1] = 0.5 * pixel[1] + 0.5 * pow(value_y, 3.0f);  // green
 | 
			
		||||
  pixel[2] = 0.5f + 0.5f * cos(t);                       // blue
 | 
			
		||||
  pixel[3] = 1;                                          // alpha
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern "C" void cuda_texture_2d(void *surface, int width, int height,
 | 
			
		||||
                                size_t pitch, float t) {
 | 
			
		||||
  cudaError_t error = cudaSuccess;
 | 
			
		||||
 | 
			
		||||
  dim3 Db = dim3(16, 16);  // block dimensions are fixed to be 256 threads
 | 
			
		||||
  dim3 Dg = dim3((width + Db.x - 1) / Db.x, (height + Db.y - 1) / Db.y);
 | 
			
		||||
 | 
			
		||||
  cuda_kernel_texture_2d<<<Dg, Db>>>((unsigned char *)surface, width, height,
 | 
			
		||||
                                     pitch, t);
 | 
			
		||||
 | 
			
		||||
  error = cudaGetLastError();
 | 
			
		||||
 | 
			
		||||
  if (error != cudaSuccess) {
 | 
			
		||||
    printf("cuda_kernel_texture_2d() failed to launch error = %d\n", error);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@ -1,89 +0,0 @@
 | 
			
		||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
#define PI 3.1415926536f
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Paint a 2D surface with a moving bulls-eye pattern.  The "face" parameter
 | 
			
		||||
 * selects  between 6 different colors to use.  We will use a different color on
 | 
			
		||||
 * each face of a  cube map.
 | 
			
		||||
 */
 | 
			
		||||
__global__ void cuda_kernel_texture_cube(char *surface, int width, int height,
 | 
			
		||||
                                         size_t pitch, int face, float t) {
 | 
			
		||||
  int x = blockIdx.x * blockDim.x + threadIdx.x;
 | 
			
		||||
  int y = blockIdx.y * blockDim.y + threadIdx.y;
 | 
			
		||||
  unsigned char *pixel;
 | 
			
		||||
 | 
			
		||||
  // in the case where, due to quantization into grids, we have
 | 
			
		||||
  // more threads than pixels, skip the threads which don't
 | 
			
		||||
  // correspond to valid pixels
 | 
			
		||||
  if (x >= width || y >= height) return;
 | 
			
		||||
 | 
			
		||||
  // get a pointer to this pixel
 | 
			
		||||
  pixel = (unsigned char *)(surface + y * pitch) + 4 * x;
 | 
			
		||||
 | 
			
		||||
  // populate it
 | 
			
		||||
  float theta_x = (2.0f * x) / width - 1.0f;
 | 
			
		||||
  float theta_y = (2.0f * y) / height - 1.0f;
 | 
			
		||||
  float theta = 2.0f * PI * sqrt(theta_x * theta_x + theta_y * theta_y);
 | 
			
		||||
  unsigned char value = 255 * (0.6f + 0.4f * cos(theta + t));
 | 
			
		||||
 | 
			
		||||
  pixel[3] = 255;  // alpha
 | 
			
		||||
 | 
			
		||||
  if (face % 2) {
 | 
			
		||||
    pixel[0] =         // blue
 | 
			
		||||
        pixel[1] =     // green
 | 
			
		||||
        pixel[2] = 0;  // red
 | 
			
		||||
    pixel[face / 2] = value;
 | 
			
		||||
  } else {
 | 
			
		||||
    pixel[0] =             // blue
 | 
			
		||||
        pixel[1] =         // green
 | 
			
		||||
        pixel[2] = value;  // red
 | 
			
		||||
    pixel[face / 2] = 0;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern "C" void cuda_texture_cube(void *surface, int width, int height,
 | 
			
		||||
                                  size_t pitch, int face, float t) {
 | 
			
		||||
  cudaError_t error = cudaSuccess;
 | 
			
		||||
 | 
			
		||||
  dim3 Db = dim3(16, 16);  // block dimensions are fixed to be 256 threads
 | 
			
		||||
  dim3 Dg = dim3((width + Db.x - 1) / Db.x, (height + Db.y - 1) / Db.y);
 | 
			
		||||
 | 
			
		||||
  cuda_kernel_texture_cube<<<Dg, Db>>>((char *)surface, width, height, pitch,
 | 
			
		||||
                                       face, t);
 | 
			
		||||
 | 
			
		||||
  error = cudaGetLastError();
 | 
			
		||||
 | 
			
		||||
  if (error != cudaSuccess) {
 | 
			
		||||
    printf("cuda_kernel_texture_cube() failed to launch error = %d\n", error);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@ -1,75 +0,0 @@
 | 
			
		||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
#include <string.h>
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Paint a 3D texture with a gradient in X (blue) and Z (green), and have every
 | 
			
		||||
 * other Z slice have full red.
 | 
			
		||||
 */
 | 
			
		||||
__global__ void cuda_kernel_texture_volume(unsigned char *surface, int width,
 | 
			
		||||
                                           int height, int depth, size_t pitch,
 | 
			
		||||
                                           size_t pitchSlice) {
 | 
			
		||||
  int x = blockIdx.x * blockDim.x + threadIdx.x;
 | 
			
		||||
  int y = blockIdx.y * blockDim.y + threadIdx.y;
 | 
			
		||||
 | 
			
		||||
  // in the case where, due to quantization into grids, we have
 | 
			
		||||
  // more threads than pixels, skip the threads which don't
 | 
			
		||||
  // correspond to valid pixels
 | 
			
		||||
  if (x >= width || y >= height) return;
 | 
			
		||||
 | 
			
		||||
  // walk across the Z slices of this texture.  it should be noted that
 | 
			
		||||
  // this is far from optimal data access.
 | 
			
		||||
  for (int z = 0; z < depth; ++z) {
 | 
			
		||||
    // get a pointer to this pixel
 | 
			
		||||
    unsigned char *pixel = surface + z * pitchSlice + y * pitch + 4 * x;
 | 
			
		||||
    pixel[0] = 255 * x / (width - 1);  // blue
 | 
			
		||||
    pixel[1] = 255 * z / (depth - 1);  // green
 | 
			
		||||
    pixel[2] = 255 * (z % 2);          // red
 | 
			
		||||
    pixel[3] = 255;                    // alpha
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
extern "C" void cuda_texture_volume(void *surface, int width, int height,
 | 
			
		||||
                                    int depth, size_t pitch, size_t pitchSlice,
 | 
			
		||||
                                    float t) {
 | 
			
		||||
  cudaError_t error = cudaSuccess;
 | 
			
		||||
 | 
			
		||||
  dim3 Db = dim3(16, 16);  // block dimensions are fixed to be 256 threads
 | 
			
		||||
  dim3 Dg = dim3((width + Db.x - 1) / Db.x, (height + Db.y - 1) / Db.y);
 | 
			
		||||
 | 
			
		||||
  cuda_kernel_texture_volume<<<Dg, Db>>>((unsigned char *)surface, width,
 | 
			
		||||
                                         height, depth, pitch, pitchSlice);
 | 
			
		||||
 | 
			
		||||
  error = cudaGetLastError();
 | 
			
		||||
 | 
			
		||||
  if (error != cudaSuccess) {
 | 
			
		||||
    printf("cuda_kernel_texture_volume() failed to launch error = %d\n", error);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user