mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2024-11-24 20:39:17 +08:00
648 lines
24 KiB
C
648 lines
24 KiB
C
/*
|
|
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
|
|
*
|
|
* NVIDIA Corporation and its licensors retain all intellectual property and
|
|
* proprietary rights in and to this software and related documentation.
|
|
* Any use, reproduction, disclosure, or distribution of this software
|
|
* and related documentation without an express license agreement from
|
|
* NVIDIA Corporation is strictly prohibited.
|
|
*
|
|
* Please refer to the applicable NVIDIA end user license agreement (EULA)
|
|
* associated with this source code for terms and conditions that govern
|
|
* your use of this NVIDIA software.
|
|
*
|
|
*/
|
|
|
|
// With these flags defined, this source file will dynamically
|
|
// load the corresponding functions. Disabled by default.
|
|
//#define CUDA_INIT_D3D9
|
|
//#define CUDA_INIT_D3D10
|
|
//#define CUDA_INIT_D3D11
|
|
//#define CUDA_INIT_OPENGL
|
|
|
|
#include <stdio.h>
|
|
#include "cuda_drvapi_dynlink.h"
|
|
|
|
tcuInit *_cuInit;
|
|
tcuDriverGetVersion *cuDriverGetVersion;
|
|
tcuDeviceGet *cuDeviceGet;
|
|
tcuDeviceGetCount *cuDeviceGetCount;
|
|
tcuDeviceGetName *cuDeviceGetName;
|
|
tcuDeviceComputeCapability *cuDeviceComputeCapability;
|
|
tcuDeviceTotalMem *cuDeviceTotalMem;
|
|
tcuDeviceGetProperties *cuDeviceGetProperties;
|
|
tcuDeviceGetAttribute *cuDeviceGetAttribute;
|
|
tcuGetErrorString *cuGetErrorString;
|
|
tcuCtxCreate *cuCtxCreate;
|
|
tcuCtxDestroy *cuCtxDestroy;
|
|
tcuCtxAttach *cuCtxAttach;
|
|
tcuCtxDetach *cuCtxDetach;
|
|
tcuCtxPushCurrent *cuCtxPushCurrent;
|
|
tcuCtxPopCurrent *cuCtxPopCurrent;
|
|
tcuCtxGetCurrent *cuCtxGetCurrent;
|
|
tcuCtxSetCurrent *cuCtxSetCurrent;
|
|
tcuCtxGetDevice *cuCtxGetDevice;
|
|
tcuCtxSynchronize *cuCtxSynchronize;
|
|
tcuModuleLoad *cuModuleLoad;
|
|
tcuModuleLoadData *cuModuleLoadData;
|
|
tcuModuleLoadDataEx *cuModuleLoadDataEx;
|
|
tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
|
|
tcuModuleUnload *cuModuleUnload;
|
|
tcuModuleGetFunction *cuModuleGetFunction;
|
|
tcuModuleGetGlobal *cuModuleGetGlobal;
|
|
tcuModuleGetTexRef *cuModuleGetTexRef;
|
|
tcuModuleGetSurfRef *cuModuleGetSurfRef;
|
|
tcuMemGetInfo *cuMemGetInfo;
|
|
tcuMemAlloc *cuMemAlloc;
|
|
tcuMemAllocPitch *cuMemAllocPitch;
|
|
tcuMemFree *cuMemFree;
|
|
tcuMemGetAddressRange *cuMemGetAddressRange;
|
|
tcuMemAllocHost *cuMemAllocHost;
|
|
tcuMemFreeHost *cuMemFreeHost;
|
|
tcuMemHostAlloc *cuMemHostAlloc;
|
|
tcuMemHostGetFlags *cuMemHostGetFlags;
|
|
|
|
tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer;
|
|
tcuDeviceGetByPCIBusId *cuDeviceGetByPCIBusId;
|
|
tcuDeviceGetPCIBusId *cuDeviceGetPCIBusId;
|
|
tcuIpcGetEventHandle *cuIpcGetEventHandle;
|
|
tcuIpcOpenEventHandle *cuIpcOpenEventHandle;
|
|
tcuIpcGetMemHandle *cuIpcGetMemHandle;
|
|
tcuIpcOpenMemHandle *cuIpcOpenMemHandle;
|
|
tcuIpcCloseMemHandle *cuIpcCloseMemHandle;
|
|
|
|
tcuMemHostRegister *cuMemHostRegister;
|
|
tcuMemHostUnregister *cuMemHostUnregister;
|
|
tcuMemcpyHtoD *cuMemcpyHtoD;
|
|
tcuMemcpyDtoH *cuMemcpyDtoH;
|
|
tcuMemcpyDtoD *cuMemcpyDtoD;
|
|
tcuMemcpyDtoA *cuMemcpyDtoA;
|
|
tcuMemcpyAtoD *cuMemcpyAtoD;
|
|
tcuMemcpyHtoA *cuMemcpyHtoA;
|
|
tcuMemcpyAtoH *cuMemcpyAtoH;
|
|
tcuMemcpyAtoA *cuMemcpyAtoA;
|
|
tcuMemcpy2D *cuMemcpy2D;
|
|
tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned;
|
|
tcuMemcpy3D *cuMemcpy3D;
|
|
tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync;
|
|
tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync;
|
|
tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync;
|
|
tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync;
|
|
tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync;
|
|
tcuMemcpy2DAsync *cuMemcpy2DAsync;
|
|
tcuMemcpy3DAsync *cuMemcpy3DAsync;
|
|
tcuMemcpy *cuMemcpy;
|
|
tcuMemcpyPeer *cuMemcpyPeer;
|
|
tcuMemsetD8 *cuMemsetD8;
|
|
tcuMemsetD16 *cuMemsetD16;
|
|
tcuMemsetD32 *cuMemsetD32;
|
|
tcuMemsetD2D8 *cuMemsetD2D8;
|
|
tcuMemsetD2D16 *cuMemsetD2D16;
|
|
tcuMemsetD2D32 *cuMemsetD2D32;
|
|
tcuFuncSetBlockShape *cuFuncSetBlockShape;
|
|
tcuFuncSetSharedSize *cuFuncSetSharedSize;
|
|
tcuFuncGetAttribute *cuFuncGetAttribute;
|
|
tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
|
|
tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig;
|
|
tcuLaunchKernel *cuLaunchKernel;
|
|
tcuArrayCreate *cuArrayCreate;
|
|
tcuArrayGetDescriptor *cuArrayGetDescriptor;
|
|
tcuArrayDestroy *cuArrayDestroy;
|
|
tcuArray3DCreate *cuArray3DCreate;
|
|
tcuArray3DGetDescriptor *cuArray3DGetDescriptor;
|
|
tcuTexRefCreate *cuTexRefCreate;
|
|
tcuTexRefDestroy *cuTexRefDestroy;
|
|
tcuTexRefSetArray *cuTexRefSetArray;
|
|
tcuTexRefSetAddress *cuTexRefSetAddress;
|
|
tcuTexRefSetAddress2D *cuTexRefSetAddress2D;
|
|
tcuTexRefSetFormat *cuTexRefSetFormat;
|
|
tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
|
|
tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
|
|
tcuTexRefSetFlags *cuTexRefSetFlags;
|
|
tcuTexRefGetAddress *cuTexRefGetAddress;
|
|
tcuTexRefGetArray *cuTexRefGetArray;
|
|
tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
|
|
tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
|
|
tcuTexRefGetFormat *cuTexRefGetFormat;
|
|
tcuTexRefGetFlags *cuTexRefGetFlags;
|
|
tcuSurfRefSetArray *cuSurfRefSetArray;
|
|
tcuSurfRefGetArray *cuSurfRefGetArray;
|
|
tcuParamSetSize *cuParamSetSize;
|
|
tcuParamSeti *cuParamSeti;
|
|
tcuParamSetf *cuParamSetf;
|
|
tcuParamSetv *cuParamSetv;
|
|
tcuParamSetTexRef *cuParamSetTexRef;
|
|
tcuLaunch *cuLaunch;
|
|
tcuLaunchGrid *cuLaunchGrid;
|
|
tcuLaunchGridAsync *cuLaunchGridAsync;
|
|
tcuEventCreate *cuEventCreate;
|
|
tcuEventRecord *cuEventRecord;
|
|
tcuEventQuery *cuEventQuery;
|
|
tcuEventSynchronize *cuEventSynchronize;
|
|
tcuEventDestroy *cuEventDestroy;
|
|
tcuEventElapsedTime *cuEventElapsedTime;
|
|
tcuStreamCreate *cuStreamCreate;
|
|
tcuStreamWaitEvent *cuStreamWaitEvent;
|
|
tcuStreamAddCallback *cuStreamAddCallback;
|
|
tcuStreamQuery *cuStreamQuery;
|
|
tcuStreamSynchronize *cuStreamSynchronize;
|
|
tcuStreamDestroy *cuStreamDestroy;
|
|
tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
|
|
tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
|
|
tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer;
|
|
tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
|
|
tcuGraphicsMapResources *cuGraphicsMapResources;
|
|
tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
|
|
tcuGetExportTable *cuGetExportTable;
|
|
tcuCtxSetLimit *cuCtxSetLimit;
|
|
tcuCtxGetLimit *cuCtxGetLimit;
|
|
tcuCtxGetCacheConfig *cuCtxGetCacheConfig;
|
|
tcuCtxSetCacheConfig *cuCtxSetCacheConfig;
|
|
tcuCtxGetSharedMemConfig *cuCtxGetSharedMemConfig;
|
|
tcuCtxSetSharedMemConfig *cuCtxSetSharedMemConfig;
|
|
tcuCtxGetApiVersion *cuCtxGetApiVersion;
|
|
|
|
tcuMipmappedArrayCreate *cuMipmappedArrayCreate;
|
|
tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel;
|
|
tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy;
|
|
|
|
tcuProfilerStop *cuProfilerStop;
|
|
|
|
#ifdef CUDA_INIT_D3D9
|
|
// D3D9/CUDA interop (CUDA 1.x compatible API). These functions
|
|
// are deprecated; please use the ones below
|
|
tcuD3D9Begin *cuD3D9Begin;
|
|
tcuD3D9End *cuD3DEnd;
|
|
tcuD3D9RegisterVertexBuffer *cuD3D9RegisterVertexBuffer;
|
|
tcuD3D9MapVertexBuffer *cuD3D9MapVertexBuffer;
|
|
tcuD3D9UnmapVertexBuffer *cuD3D9UnmapVertexBuffer;
|
|
tcuD3D9UnregisterVertexBuffer *cuD3D9UnregisterVertexBuffer;
|
|
|
|
// D3D9/CUDA interop (CUDA 2.x compatible)
|
|
tcuD3D9GetDirect3DDevice *cuD3D9GetDirect3DDevice;
|
|
tcuD3D9RegisterResource *cuD3D9RegisterResource;
|
|
tcuD3D9UnregisterResource *cuD3D9UnregisterResource;
|
|
tcuD3D9MapResources *cuD3D9MapResources;
|
|
tcuD3D9UnmapResources *cuD3D9UnmapResources;
|
|
tcuD3D9ResourceSetMapFlags *cuD3D9ResourceSetMapFlags;
|
|
tcuD3D9ResourceGetSurfaceDimensions *cuD3D9ResourceGetSurfaceDimensions;
|
|
tcuD3D9ResourceGetMappedArray *cuD3D9ResourceGetMappedArray;
|
|
tcuD3D9ResourceGetMappedPointer *cuD3D9ResourceGetMappedPointer;
|
|
tcuD3D9ResourceGetMappedSize *cuD3D9ResourceGetMappedSize;
|
|
tcuD3D9ResourceGetMappedPitch *cuD3D9ResourceGetMappedPitch;
|
|
|
|
// D3D9/CUDA interop (CUDA 2.0+)
|
|
tcuD3D9GetDevice *cuD3D9GetDevice;
|
|
tcuD3D9CtxCreate *cuD3D9CtxCreate;
|
|
tcuGraphicsD3D9RegisterResource *cuGraphicsD3D9RegisterResource;
|
|
#endif
|
|
|
|
#ifdef CUDA_INIT_D3D10
|
|
// D3D10/CUDA interop (CUDA 3.0+)
|
|
tcuD3D10GetDevice *cuD3D10GetDevice;
|
|
tcuD3D10CtxCreate *cuD3D10CtxCreate;
|
|
tcuGraphicsD3D10RegisterResource *cuGraphicsD3D10RegisterResource;
|
|
#endif
|
|
|
|
|
|
#ifdef CUDA_INIT_D3D11
|
|
// D3D11/CUDA interop (CUDA 3.0+)
|
|
tcuD3D11GetDevice *cuD3D11GetDevice;
|
|
tcuD3D11CtxCreate *cuD3D11CtxCreate;
|
|
tcuGraphicsD3D11RegisterResource *cuGraphicsD3D11RegisterResource;
|
|
#endif
|
|
|
|
// GL/CUDA interop
|
|
#ifdef CUDA_INIT_OPENGL
|
|
tcuGLCtxCreate *cuGLCtxCreate;
|
|
tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
|
|
tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
|
|
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
|
tcuWGLGetDevice *cuWGLGetDevice;
|
|
#endif
|
|
#endif
|
|
|
|
#define STRINGIFY(X) #X
|
|
|
|
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
|
#include <Windows.h>
|
|
|
|
#ifdef UNICODE
|
|
static LPCWSTR __CudaLibName = L"nvcuda.dll";
|
|
#else
|
|
static LPCSTR __CudaLibName = "nvcuda.dll";
|
|
#endif
|
|
|
|
typedef HMODULE CUDADRIVER;
|
|
|
|
static CUresult LOAD_LIBRARY(CUDADRIVER *pInstance)
|
|
{
|
|
*pInstance = LoadLibrary(__CudaLibName);
|
|
|
|
if (*pInstance == NULL)
|
|
{
|
|
printf("LoadLibrary \"%s\" failed!\n", __CudaLibName);
|
|
return CUDA_ERROR_UNKNOWN;
|
|
}
|
|
|
|
return CUDA_SUCCESS;
|
|
}
|
|
|
|
#define GET_PROC_EX(name, alias, required) \
|
|
alias = (t##name *)GetProcAddress(CudaDrvLib, #name); \
|
|
if (alias == NULL && required) { \
|
|
printf("Failed to find required function \"%s\" in %s\n", \
|
|
#name, __CudaLibName); \
|
|
return CUDA_ERROR_UNKNOWN; \
|
|
}
|
|
|
|
#define GET_PROC_EX_V2(name, alias, required) \
|
|
alias = (t##name *)GetProcAddress(CudaDrvLib, STRINGIFY(name##_v2));\
|
|
if (alias == NULL && required) { \
|
|
printf("Failed to find required function \"%s\" in %s\n", \
|
|
STRINGIFY(name##_v2), __CudaLibName); \
|
|
return CUDA_ERROR_UNKNOWN; \
|
|
}
|
|
|
|
#define GET_PROC_EX_V3(name, alias, required) \
|
|
alias = (t##name *)GetProcAddress(CudaDrvLib, STRINGIFY(name##_v3));\
|
|
if (alias == NULL && required) { \
|
|
printf("Failed to find required function \"%s\" in %s\n", \
|
|
STRINGIFY(name##_v3), __CudaLibName); \
|
|
return CUDA_ERROR_UNKNOWN; \
|
|
}
|
|
|
|
#elif defined(__unix__) || defined (__QNX__) || defined(__APPLE__) || defined(__MACOSX)
|
|
|
|
#include <dlfcn.h>
|
|
|
|
#if defined(__APPLE__) || defined(__MACOSX)
|
|
static char __CudaLibName[] = "/usr/local/cuda/lib/libcuda.dylib";
|
|
#elif defined(__ANDROID__)
|
|
#if defined (__aarch64__)
|
|
static char __CudaLibName[] = "/system/vendor/lib64/libcuda.so";
|
|
#elif defined(__arm__)
|
|
static char __CudaLibName[] = "/system/vendor/lib/libcuda.so";
|
|
#endif
|
|
#else
|
|
static char __CudaLibName[] = "libcuda.so.1";
|
|
#endif
|
|
|
|
typedef void *CUDADRIVER;
|
|
|
|
static CUresult LOAD_LIBRARY(CUDADRIVER *pInstance)
|
|
{
|
|
*pInstance = dlopen(__CudaLibName, RTLD_NOW);
|
|
|
|
if (*pInstance == NULL)
|
|
{
|
|
printf("dlopen \"%s\" failed!\n", __CudaLibName);
|
|
return CUDA_ERROR_UNKNOWN;
|
|
}
|
|
|
|
return CUDA_SUCCESS;
|
|
}
|
|
|
|
#define GET_PROC_EX(name, alias, required) \
|
|
alias = (t##name *)dlsym(CudaDrvLib, #name); \
|
|
if (alias == NULL && required) { \
|
|
printf("Failed to find required function \"%s\" in %s\n", \
|
|
#name, __CudaLibName); \
|
|
return CUDA_ERROR_UNKNOWN; \
|
|
}
|
|
|
|
#define GET_PROC_EX_V2(name, alias, required) \
|
|
alias = (t##name *)dlsym(CudaDrvLib, STRINGIFY(name##_v2)); \
|
|
if (alias == NULL && required) { \
|
|
printf("Failed to find required function \"%s\" in %s\n", \
|
|
STRINGIFY(name##_v2), __CudaLibName); \
|
|
return CUDA_ERROR_UNKNOWN; \
|
|
}
|
|
|
|
#define GET_PROC_EX_V3(name, alias, required) \
|
|
alias = (t##name *)dlsym(CudaDrvLib, STRINGIFY(name##_v3)); \
|
|
if (alias == NULL && required) { \
|
|
printf("Failed to find required function \"%s\" in %s\n", \
|
|
STRINGIFY(name##_v3), __CudaLibName); \
|
|
return CUDA_ERROR_UNKNOWN; \
|
|
}
|
|
|
|
#else
|
|
#error unsupported platform
|
|
#endif
|
|
|
|
#define CHECKED_CALL(call) \
|
|
do { \
|
|
CUresult result = (call); \
|
|
if (CUDA_SUCCESS != result) { \
|
|
return result; \
|
|
} \
|
|
} while(0)
|
|
|
|
#define GET_PROC_REQUIRED(name) GET_PROC_EX(name,name,1)
|
|
#define GET_PROC_OPTIONAL(name) GET_PROC_EX(name,name,0)
|
|
#define GET_PROC(name) GET_PROC_REQUIRED(name)
|
|
#define GET_PROC_V2(name) GET_PROC_EX_V2(name,name,1)
|
|
#define GET_PROC_V3(name) GET_PROC_EX_V3(name,name,1)
|
|
|
|
CUresult CUDAAPI cuInit(unsigned int Flags, int cudaVersion)
|
|
{
|
|
CUDADRIVER CudaDrvLib;
|
|
int driverVer = 1000;
|
|
|
|
CHECKED_CALL(LOAD_LIBRARY(&CudaDrvLib));
|
|
|
|
// cuInit is required; alias it to _cuInit
|
|
GET_PROC_EX(cuInit, _cuInit, 1);
|
|
CHECKED_CALL(_cuInit(Flags));
|
|
|
|
// available since 2.2. if not present, version 1.0 is assumed
|
|
GET_PROC_OPTIONAL(cuDriverGetVersion);
|
|
|
|
if (cuDriverGetVersion)
|
|
{
|
|
CHECKED_CALL(cuDriverGetVersion(&driverVer));
|
|
}
|
|
|
|
// fetch all function pointers
|
|
GET_PROC(cuDeviceGet);
|
|
GET_PROC(cuDeviceGetCount);
|
|
GET_PROC(cuDeviceGetName);
|
|
GET_PROC(cuDeviceComputeCapability);
|
|
GET_PROC(cuDeviceGetProperties);
|
|
GET_PROC(cuDeviceGetAttribute);
|
|
GET_PROC(cuGetErrorString);
|
|
GET_PROC(cuCtxDestroy);
|
|
GET_PROC(cuCtxAttach);
|
|
GET_PROC(cuCtxDetach);
|
|
GET_PROC(cuCtxPushCurrent);
|
|
GET_PROC(cuCtxPopCurrent);
|
|
GET_PROC(cuCtxGetDevice);
|
|
GET_PROC(cuCtxSynchronize);
|
|
GET_PROC(cuCtxSetLimit);
|
|
GET_PROC(cuCtxGetCacheConfig);
|
|
GET_PROC(cuCtxSetCacheConfig);
|
|
GET_PROC(cuCtxGetApiVersion);
|
|
GET_PROC(cuModuleLoad);
|
|
GET_PROC(cuModuleLoadData);
|
|
GET_PROC(cuModuleUnload);
|
|
GET_PROC(cuModuleGetFunction);
|
|
GET_PROC(cuModuleGetTexRef);
|
|
GET_PROC(cuMemFreeHost);
|
|
GET_PROC(cuMemHostAlloc);
|
|
GET_PROC(cuFuncSetBlockShape);
|
|
GET_PROC(cuFuncSetSharedSize);
|
|
GET_PROC(cuFuncGetAttribute);
|
|
GET_PROC(cuArrayDestroy);
|
|
GET_PROC(cuTexRefCreate);
|
|
GET_PROC(cuTexRefDestroy);
|
|
GET_PROC(cuTexRefSetArray);
|
|
GET_PROC(cuTexRefSetFormat);
|
|
GET_PROC(cuTexRefSetAddressMode);
|
|
GET_PROC(cuTexRefSetFilterMode);
|
|
GET_PROC(cuTexRefSetFlags);
|
|
GET_PROC(cuTexRefGetArray);
|
|
GET_PROC(cuTexRefGetAddressMode);
|
|
GET_PROC(cuTexRefGetFilterMode);
|
|
GET_PROC(cuTexRefGetFormat);
|
|
GET_PROC(cuTexRefGetFlags);
|
|
GET_PROC(cuParamSetSize);
|
|
GET_PROC(cuParamSeti);
|
|
GET_PROC(cuParamSetf);
|
|
GET_PROC(cuParamSetv);
|
|
GET_PROC(cuParamSetTexRef);
|
|
GET_PROC(cuLaunch);
|
|
GET_PROC(cuLaunchGrid);
|
|
GET_PROC(cuLaunchGridAsync);
|
|
GET_PROC(cuEventCreate);
|
|
GET_PROC(cuEventRecord);
|
|
GET_PROC(cuEventQuery);
|
|
GET_PROC(cuEventSynchronize);
|
|
GET_PROC(cuEventDestroy);
|
|
GET_PROC(cuEventElapsedTime);
|
|
GET_PROC(cuStreamCreate);
|
|
GET_PROC(cuStreamWaitEvent);
|
|
GET_PROC(cuStreamAddCallback);
|
|
GET_PROC(cuStreamQuery);
|
|
GET_PROC(cuStreamSynchronize);
|
|
GET_PROC(cuStreamDestroy);
|
|
|
|
// These are CUDA 5.0 new functions
|
|
if (driverVer >= 5000)
|
|
{
|
|
GET_PROC(cuMipmappedArrayCreate);
|
|
GET_PROC(cuMipmappedArrayDestroy);
|
|
GET_PROC(cuMipmappedArrayGetLevel);
|
|
}
|
|
|
|
// These are CUDA 4.2 new functions
|
|
if (driverVer >= 4020)
|
|
{
|
|
GET_PROC(cuFuncSetSharedMemConfig);
|
|
GET_PROC(cuCtxGetSharedMemConfig);
|
|
GET_PROC(cuCtxSetSharedMemConfig);
|
|
}
|
|
|
|
// These are CUDA 4.1 new functions
|
|
if (cudaVersion >= 4010 && __CUDA_API_VERSION >= 4010)
|
|
{
|
|
GET_PROC(cuDeviceGetByPCIBusId);
|
|
GET_PROC(cuDeviceGetPCIBusId);
|
|
GET_PROC(cuIpcGetEventHandle);
|
|
GET_PROC(cuIpcOpenEventHandle);
|
|
GET_PROC(cuIpcGetMemHandle);
|
|
GET_PROC(cuIpcOpenMemHandle);
|
|
GET_PROC(cuIpcCloseMemHandle);
|
|
}
|
|
|
|
// These could be _v2 interfaces
|
|
if (cudaVersion >= 4000 && __CUDA_API_VERSION >= 4000)
|
|
{
|
|
GET_PROC_V2(cuCtxDestroy);
|
|
GET_PROC_V2(cuCtxPopCurrent);
|
|
GET_PROC_V2(cuCtxPushCurrent);
|
|
GET_PROC_V2(cuStreamDestroy);
|
|
GET_PROC_V2(cuEventDestroy);
|
|
}
|
|
|
|
if (cudaVersion >= 3020 && __CUDA_API_VERSION >= 3020)
|
|
{
|
|
GET_PROC_V2(cuDeviceTotalMem);
|
|
GET_PROC_V2(cuCtxCreate);
|
|
GET_PROC_V2(cuModuleGetGlobal);
|
|
GET_PROC_V2(cuMemGetInfo);
|
|
GET_PROC_V2(cuMemAlloc);
|
|
GET_PROC_V2(cuMemAllocPitch);
|
|
GET_PROC_V2(cuMemFree);
|
|
GET_PROC_V2(cuMemGetAddressRange);
|
|
GET_PROC_V2(cuMemAllocHost);
|
|
GET_PROC_V2(cuMemHostGetDevicePointer);
|
|
GET_PROC_V2(cuMemcpyHtoD);
|
|
GET_PROC_V2(cuMemcpyDtoH);
|
|
GET_PROC_V2(cuMemcpyDtoD);
|
|
GET_PROC_V2(cuMemcpyDtoA);
|
|
GET_PROC_V2(cuMemcpyAtoD);
|
|
GET_PROC_V2(cuMemcpyHtoA);
|
|
GET_PROC_V2(cuMemcpyAtoH);
|
|
GET_PROC_V2(cuMemcpyAtoA);
|
|
GET_PROC_V2(cuMemcpy2D);
|
|
GET_PROC_V2(cuMemcpy2DUnaligned);
|
|
GET_PROC_V2(cuMemcpy3D);
|
|
GET_PROC_V2(cuMemcpyHtoDAsync);
|
|
GET_PROC_V2(cuMemcpyDtoHAsync);
|
|
GET_PROC_V2(cuMemcpyHtoAAsync);
|
|
GET_PROC_V2(cuMemcpyAtoHAsync);
|
|
GET_PROC_V2(cuMemcpy2DAsync);
|
|
GET_PROC_V2(cuMemcpy3DAsync);
|
|
GET_PROC_V2(cuMemsetD8);
|
|
GET_PROC_V2(cuMemsetD16);
|
|
GET_PROC_V2(cuMemsetD32);
|
|
GET_PROC_V2(cuMemsetD2D8);
|
|
GET_PROC_V2(cuMemsetD2D16);
|
|
GET_PROC_V2(cuMemsetD2D32);
|
|
GET_PROC_V2(cuArrayCreate);
|
|
GET_PROC_V2(cuArrayGetDescriptor);
|
|
GET_PROC_V2(cuArray3DCreate);
|
|
GET_PROC_V2(cuArray3DGetDescriptor);
|
|
GET_PROC_V2(cuTexRefSetAddress);
|
|
GET_PROC_V2(cuTexRefGetAddress);
|
|
|
|
if (cudaVersion >= 4010 && __CUDA_API_VERSION >= 4010)
|
|
{
|
|
GET_PROC_V3(cuTexRefSetAddress2D);
|
|
}
|
|
else
|
|
{
|
|
GET_PROC_V2(cuTexRefSetAddress2D);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// versions earlier than 3020
|
|
GET_PROC(cuDeviceTotalMem);
|
|
GET_PROC(cuCtxCreate);
|
|
GET_PROC(cuModuleGetGlobal);
|
|
GET_PROC(cuMemGetInfo);
|
|
GET_PROC(cuMemAlloc);
|
|
GET_PROC(cuMemAllocPitch);
|
|
GET_PROC(cuMemFree);
|
|
GET_PROC(cuMemGetAddressRange);
|
|
GET_PROC(cuMemAllocHost);
|
|
GET_PROC(cuMemHostGetDevicePointer);
|
|
GET_PROC(cuMemcpyHtoD);
|
|
GET_PROC(cuMemcpyDtoH);
|
|
GET_PROC(cuMemcpyDtoD);
|
|
GET_PROC(cuMemcpyDtoA);
|
|
GET_PROC(cuMemcpyAtoD);
|
|
GET_PROC(cuMemcpyHtoA);
|
|
GET_PROC(cuMemcpyAtoH);
|
|
GET_PROC(cuMemcpyAtoA);
|
|
GET_PROC(cuMemcpy2D);
|
|
GET_PROC(cuMemcpy2DUnaligned);
|
|
GET_PROC(cuMemcpy3D);
|
|
GET_PROC(cuMemcpyHtoDAsync);
|
|
GET_PROC(cuMemcpyDtoHAsync);
|
|
GET_PROC(cuMemcpyHtoAAsync);
|
|
GET_PROC(cuMemcpyAtoHAsync);
|
|
GET_PROC(cuMemcpy2DAsync);
|
|
GET_PROC(cuMemcpy3DAsync);
|
|
GET_PROC(cuMemsetD8);
|
|
GET_PROC(cuMemsetD16);
|
|
GET_PROC(cuMemsetD32);
|
|
GET_PROC(cuMemsetD2D8);
|
|
GET_PROC(cuMemsetD2D16);
|
|
GET_PROC(cuMemsetD2D32);
|
|
GET_PROC(cuArrayCreate);
|
|
GET_PROC(cuArrayGetDescriptor);
|
|
GET_PROC(cuArray3DCreate);
|
|
GET_PROC(cuArray3DGetDescriptor);
|
|
GET_PROC(cuTexRefSetAddress);
|
|
GET_PROC(cuTexRefSetAddress2D);
|
|
GET_PROC(cuTexRefGetAddress);
|
|
}
|
|
|
|
// The following functions are specific to CUDA versions
|
|
if (driverVer >= 4000)
|
|
{
|
|
GET_PROC(cuCtxSetCurrent);
|
|
GET_PROC(cuCtxGetCurrent);
|
|
GET_PROC(cuMemHostRegister);
|
|
GET_PROC(cuMemHostUnregister);
|
|
GET_PROC(cuMemcpy);
|
|
GET_PROC(cuMemcpyPeer);
|
|
GET_PROC(cuLaunchKernel);
|
|
GET_PROC(cuProfilerStop);
|
|
}
|
|
|
|
if (driverVer >= 3010)
|
|
{
|
|
GET_PROC(cuModuleGetSurfRef);
|
|
GET_PROC(cuSurfRefSetArray);
|
|
GET_PROC(cuSurfRefGetArray);
|
|
GET_PROC(cuCtxSetLimit);
|
|
GET_PROC(cuCtxGetLimit);
|
|
}
|
|
|
|
if (driverVer >= 3000)
|
|
{
|
|
GET_PROC(cuMemcpyDtoDAsync);
|
|
GET_PROC(cuFuncSetCacheConfig);
|
|
#ifdef CUDA_INIT_D3D11
|
|
GET_PROC(cuD3D11GetDevice);
|
|
GET_PROC(cuD3D11CtxCreate);
|
|
GET_PROC(cuGraphicsD3D11RegisterResource);
|
|
#endif
|
|
GET_PROC(cuGraphicsUnregisterResource);
|
|
GET_PROC(cuGraphicsSubResourceGetMappedArray);
|
|
|
|
if (cudaVersion >= 3020 && __CUDA_API_VERSION >= 3020)
|
|
{
|
|
GET_PROC_V2(cuGraphicsResourceGetMappedPointer);
|
|
}
|
|
else
|
|
{
|
|
GET_PROC(cuGraphicsResourceGetMappedPointer);
|
|
}
|
|
|
|
GET_PROC(cuGraphicsResourceSetMapFlags);
|
|
GET_PROC(cuGraphicsMapResources);
|
|
GET_PROC(cuGraphicsUnmapResources);
|
|
GET_PROC(cuGetExportTable);
|
|
}
|
|
|
|
if (driverVer >= 2030)
|
|
{
|
|
GET_PROC(cuMemHostGetFlags);
|
|
#ifdef CUDA_INIT_D3D10
|
|
GET_PROC(cuD3D10GetDevice);
|
|
GET_PROC(cuD3D10CtxCreate);
|
|
GET_PROC(cuGraphicsD3D10RegisterResource);
|
|
#endif
|
|
#ifdef CUDA_INIT_OPENGL
|
|
GET_PROC(cuGraphicsGLRegisterBuffer);
|
|
GET_PROC(cuGraphicsGLRegisterImage);
|
|
#endif
|
|
}
|
|
|
|
if (driverVer >= 2010)
|
|
{
|
|
GET_PROC(cuModuleLoadDataEx);
|
|
GET_PROC(cuModuleLoadFatBinary);
|
|
#ifdef CUDA_INIT_OPENGL
|
|
GET_PROC(cuGLCtxCreate);
|
|
GET_PROC(cuGraphicsGLRegisterBuffer);
|
|
GET_PROC(cuGraphicsGLRegisterImage);
|
|
# ifdef WIN32
|
|
GET_PROC(cuWGLGetDevice);
|
|
# endif
|
|
#endif
|
|
#ifdef CUDA_INIT_D3D9
|
|
GET_PROC(cuD3D9GetDevice);
|
|
GET_PROC(cuD3D9CtxCreate);
|
|
GET_PROC(cuGraphicsD3D9RegisterResource);
|
|
#endif
|
|
}
|
|
|
|
return CUDA_SUCCESS;
|
|
}
|