mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-08-23 15:10:31 +08:00
Merge cuda_a_dev 13.0 changes to master
This commit is contained in:
commit
148014e709
58
CHANGELOG.md
58
CHANGELOG.md
@ -1,5 +1,63 @@
|
||||
## Changelog
|
||||
|
||||
### CUDA 13.0
|
||||
* Updated the samples using the cudaDeviceProp fields which are deprecated and removed in CUDA 13.0, replacing the fields with the equivalents in "cudaDeviceGetAttribute":
|
||||
* Deprecated "cudaDeviceProp" fields
|
||||
`int clockRate; // - Replaced with "cudaDevAttrClockRate"`
|
||||
`int deviceOverlap; // - Replaced with "cudaDevAttrGpuOverlap */`
|
||||
`int kernelExecTimeoutEnabled; // - Replaced with "cudaDevAttrKernelExecTimeout`
|
||||
`int computeMode; // - Replaced with "cudaDevAttrComputeMode" */`
|
||||
`int memoryClockRate; // - Replaced with "cudaDevAttrMemoryClockRate"`
|
||||
`int cooperativeMultiDeviceLaunch; // - Deprecated, cudaLaunchCooperativeKernelMultiDevice is deprecated.`
|
||||
* `0_Introduction`
|
||||
* `UnifiedMemoryStreams`
|
||||
* `simpleHyperQ`
|
||||
* `simpleIPC`
|
||||
* `simpleMultiCopy`
|
||||
* `systemWideAtomics`
|
||||
* `1_Utilitie`
|
||||
* `deviceQuery`
|
||||
* `2_Concepts_and_Techniques`
|
||||
* `streamOrderedAllocationIPC`
|
||||
* `4_CUDA_Libraries`
|
||||
* `simpleCUBLASXT`
|
||||
* `5_Domain_Specific`
|
||||
* `simpleVulkan`
|
||||
* `vulkanImageCUDA`
|
||||
* Updated the samples using the CUDA driver API "cuCtxCreate" with adding the parameter "CUctxCreateParams" as "cuCtxCreate" is updated to "cuCtxCreate_v4" by default in CUDA 13.0:
|
||||
* `Common`
|
||||
* `nvrtc_helper.h`
|
||||
* `0_Introduction`
|
||||
* `UnifiedMemoryStreams`
|
||||
* `matrixMulDrv`
|
||||
* `simpleTextureDrv`
|
||||
* `vectorAddDrv`
|
||||
* `vectorAddMMAP`
|
||||
* `2_Concepts_and_Techniques`
|
||||
* `EGLStream_CUDA_CrossGPU`
|
||||
* `EGLStream_CUDA_Interop`
|
||||
* `threadMigration`
|
||||
* `3_CUDA_Features`
|
||||
* `graphMemoryFootprint`
|
||||
* `memMapIPCDrv`
|
||||
* `4_CUDA_Libraries`
|
||||
* `jitLto`
|
||||
* `7_libNVVM`
|
||||
* `device-side-launch`
|
||||
* `simple`
|
||||
* `uvmlite`
|
||||
* Updated the sample using CUDA API "cudaGraphAddNode"/"cudaStreamGetCaptureInfo" with adding "cudaGraphEdgeData" pointer parameter as they are updated to "cudaGraphAddNode_v2"/"cudaStreamGetCaptureInfo_v3" by default in CUDA 13.0:
|
||||
* `3_CUDA_Features`
|
||||
* `graphConditionalNodes`
|
||||
* Updated the samples using CUDA API "cudaMemAdvise"/"cudaMemPrefetchAsync" with changing the parameter "int device" to "cudaMemLocation location" as they are updated to "cudaMemAdvise_v2"/"cudaMemPrefetchAsyn_v2" by default in CUDA 13.0.
|
||||
* `4_CUDA_Libraries`
|
||||
* `conjugateGradientMultiDeviceCG`
|
||||
* `6_Performance`
|
||||
* `UnifiedMemoryPerf`
|
||||
* Replaced "thrust::identity<uint>()" with "cuda::std::identity()" as it is deprecated in CUDA 13.0.
|
||||
* `2_Concepts_and_Techniques`
|
||||
* `segmentationTreeThrust`
|
||||
|
||||
### CUDA 12.9
|
||||
* Updated toolchain for cross-compilation for Tegra Linux platforms.
|
||||
* Added `run_tests.py` utility to exercise all samples. See README.md for details
|
||||
|
@ -14,7 +14,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CUDA_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -187,6 +187,7 @@ CUmodule loadCUBIN(char *cubin, int argc, char **argv) {
|
||||
CUcontext context;
|
||||
int major = 0, minor = 0;
|
||||
char deviceName[256];
|
||||
CUctxCreateParams ctxCreateParams = {};
|
||||
|
||||
// Picks the best CUDA device available
|
||||
CUdevice cuDevice = findCudaDeviceDRV(argc, (const char **)argv);
|
||||
@ -200,7 +201,7 @@ CUmodule loadCUBIN(char *cubin, int argc, char **argv) {
|
||||
printf("> GPU Device has SM %d.%d compute capability\n", major, minor);
|
||||
|
||||
checkCudaErrors(cuInit(0));
|
||||
checkCudaErrors(cuCtxCreate(&context, 0, cuDevice));
|
||||
checkCudaErrors(cuCtxCreate(&context, &ctxCreateParams, 0, cuDevice));
|
||||
|
||||
checkCudaErrors(cuModuleLoadData(&module, cubin));
|
||||
free(cubin);
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -247,7 +247,9 @@ int main(int argc, char **argv)
|
||||
exit(EXIT_WAIVED);
|
||||
}
|
||||
|
||||
if (device_prop.computeMode == cudaComputeModeProhibited) {
|
||||
int computeMode;
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, dev_id));
|
||||
if (computeMode == cudaComputeModeProhibited) {
|
||||
// This sample requires being run with a default or process exclusive mode
|
||||
fprintf(stderr,
|
||||
"This sample requires a device in either default or process "
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
else()
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -268,9 +268,10 @@ void randomInit(float *data, int size)
|
||||
|
||||
static int initCUDA(int argc, char **argv, CUfunction *pMatrixMul, int *blk_size)
|
||||
{
|
||||
CUfunction cuFunction = 0;
|
||||
int major = 0, minor = 0;
|
||||
char deviceName[100];
|
||||
CUfunction cuFunction = 0;
|
||||
int major = 0, minor = 0;
|
||||
char deviceName[100];
|
||||
CUctxCreateParams ctxCreateParams = {};
|
||||
|
||||
cuDevice = findCudaDeviceDRV(argc, (const char **)argv);
|
||||
|
||||
@ -283,7 +284,7 @@ static int initCUDA(int argc, char **argv, CUfunction *pMatrixMul, int *blk_size
|
||||
checkCudaErrors(cuDeviceTotalMem(&totalGlobalMem, cuDevice));
|
||||
printf(" Total amount of global memory: %llu bytes\n", (long long unsigned int)totalGlobalMem);
|
||||
|
||||
checkCudaErrors(cuCtxCreate(&cuContext, 0, cuDevice));
|
||||
checkCudaErrors(cuCtxCreate(&cuContext, &ctxCreateParams, 0, cuDevice));
|
||||
|
||||
// first search for the module path before we load the results
|
||||
std::string module_path;
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -80,19 +80,20 @@ static void check(CUresult result, char const *const func, const char *const fil
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
printf("simpleDrvRuntime..\n");
|
||||
int N = 50000, devID = 0;
|
||||
size_t size = N * sizeof(float);
|
||||
CUdevice cuDevice;
|
||||
CUfunction vecAdd_kernel;
|
||||
CUmodule cuModule = 0;
|
||||
CUcontext cuContext;
|
||||
int N = 50000, devID = 0;
|
||||
size_t size = N * sizeof(float);
|
||||
CUdevice cuDevice;
|
||||
CUfunction vecAdd_kernel;
|
||||
CUmodule cuModule = 0;
|
||||
CUcontext cuContext;
|
||||
CUctxCreateParams ctxCreateParams = {};
|
||||
|
||||
// Initialize
|
||||
checkCudaDrvErrors(cuInit(0));
|
||||
|
||||
cuDevice = findCudaDevice(argc, (const char **)argv);
|
||||
// Create context
|
||||
checkCudaDrvErrors(cuCtxCreate(&cuContext, 0, cuDevice));
|
||||
checkCudaDrvErrors(cuCtxCreate(&cuContext, &ctxCreateParams, 0, cuDevice));
|
||||
|
||||
// first search for the module path before we load the results
|
||||
string module_path;
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -127,6 +127,10 @@ int main(int argc, char **argv)
|
||||
checkCudaErrors(cudaGetDevice(&cuda_device));
|
||||
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, cuda_device));
|
||||
|
||||
// Get device clock rate
|
||||
int clockRate;
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, cuda_device));
|
||||
|
||||
// HyperQ is available in devices of Compute Capability 3.5 and higher
|
||||
if (deviceProp.major < 3 || (deviceProp.major == 3 && deviceProp.minor < 5)) {
|
||||
if (deviceProp.concurrentKernels == 0) {
|
||||
@ -170,9 +174,9 @@ int main(int argc, char **argv)
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
// the kernel takes more time than the channel reset time on arm archs, so to
|
||||
// prevent hangs reduce time_clocks.
|
||||
clock_t time_clocks = (clock_t)(kernel_time * (deviceProp.clockRate / 100));
|
||||
clock_t time_clocks = (clock_t)(kernel_time * (clockRate / 100));
|
||||
#else
|
||||
clock_t time_clocks = (clock_t)(kernel_time * deviceProp.clockRate);
|
||||
clock_t time_clocks = (clock_t)(kernel_time * clockRate);
|
||||
#endif
|
||||
clock_t total_clocks = 0;
|
||||
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -247,7 +247,9 @@ static void parentProcess(char *app)
|
||||
}
|
||||
// This sample requires two processes accessing each device, so we need
|
||||
// to ensure exclusive or prohibited mode is not set
|
||||
if (prop.computeMode != cudaComputeModeDefault) {
|
||||
int computeMode;
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, i));
|
||||
if (computeMode != cudaComputeModeDefault) {
|
||||
printf("Device %d is in an unsupported compute mode for this sample\n", i);
|
||||
continue;
|
||||
}
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -218,9 +218,11 @@ int main(int argc, char *argv[])
|
||||
|
||||
printf("\n");
|
||||
printf("Relevant properties of this CUDA device\n");
|
||||
int canOverlap;
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&canOverlap, cudaDevAttrGpuOverlap, cuda_device));
|
||||
printf("(%s) Can overlap one CPU<>GPU data transfer with GPU kernel execution "
|
||||
"(device property \"deviceOverlap\")\n",
|
||||
deviceProp.deviceOverlap ? "X" : " ");
|
||||
"(device property \"cudaDevAttrGpuOverlap\")\n",
|
||||
canOverlap ? "X" : " ");
|
||||
// printf("(%s) Can execute several GPU kernels simultaneously (compute
|
||||
// capability >= 2.0)\n", deviceProp.major >= 2 ? "X": " ");
|
||||
printf("(%s) Can overlap two CPU<>GPU data transfers with GPU kernel execution\n"
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -309,10 +309,11 @@ void runTest(int argc, char **argv)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
static CUresult initCUDA(int argc, char **argv, CUfunction *transform)
|
||||
{
|
||||
CUfunction cuFunction = 0;
|
||||
int major = 0, minor = 0, devID = 0;
|
||||
char deviceName[100];
|
||||
string module_path;
|
||||
CUfunction cuFunction = 0;
|
||||
int major = 0, minor = 0, devID = 0;
|
||||
char deviceName[100];
|
||||
string module_path;
|
||||
CUctxCreateParams ctxCreateParams = {};
|
||||
|
||||
cuDevice = findCudaDeviceDRV(argc, (const char **)argv);
|
||||
|
||||
@ -322,7 +323,7 @@ static CUresult initCUDA(int argc, char **argv, CUfunction *transform)
|
||||
checkCudaErrors(cuDeviceGetName(deviceName, sizeof(deviceName), cuDevice));
|
||||
printf("> GPU Device has SM %d.%d compute capability\n", major, minor);
|
||||
|
||||
checkCudaErrors(cuCtxCreate(&cuContext, 0, cuDevice));
|
||||
checkCudaErrors(cuCtxCreate(&cuContext, &ctxCreateParams, 0, cuDevice));
|
||||
|
||||
// first search for the module_path before we try to load the results
|
||||
std::ostringstream fatbin;
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
else()
|
||||
|
@ -287,7 +287,9 @@ int main(int argc, char **argv)
|
||||
exit(EXIT_WAIVED);
|
||||
}
|
||||
|
||||
if (device_prop.computeMode == cudaComputeModeProhibited) {
|
||||
int computeMode;
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, dev_id));
|
||||
if (computeMode == cudaComputeModeProhibited) {
|
||||
// This sample requires being run with a default or process exclusive mode
|
||||
fprintf(stderr,
|
||||
"This sample requires a device in either default or process "
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -75,15 +75,16 @@ bool findModulePath(const char *, string &, char **, string &);
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
printf("Vector Addition (Driver API)\n");
|
||||
int N = 50000, devID = 0;
|
||||
size_t size = N * sizeof(float);
|
||||
int N = 50000, devID = 0;
|
||||
size_t size = N * sizeof(float);
|
||||
CUctxCreateParams ctxCreateParams = {};
|
||||
|
||||
// Initialize
|
||||
checkCudaErrors(cuInit(0));
|
||||
|
||||
cuDevice = findCudaDeviceDRV(argc, (const char **)argv);
|
||||
// Create context
|
||||
checkCudaErrors(cuCtxCreate(&cuContext, 0, cuDevice));
|
||||
checkCudaErrors(cuCtxCreate(&cuContext, &ctxCreateParams, 0, cuDevice));
|
||||
|
||||
// first search for the module path before we load the results
|
||||
string module_path;
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -141,10 +141,11 @@ int main(int argc, char **argv)
|
||||
|
||||
// Collect devices accessible by the mapping device (cuDevice) into the
|
||||
// backingDevices vector.
|
||||
vector<CUdevice> backingDevices = getBackingDevices(cuDevice);
|
||||
vector<CUdevice> backingDevices = getBackingDevices(cuDevice);
|
||||
CUctxCreateParams ctxCreateParams = {};
|
||||
|
||||
// Create context
|
||||
checkCudaErrors(cuCtxCreate(&cuContext, 0, cuDevice));
|
||||
checkCudaErrors(cuCtxCreate(&cuContext, &ctxCreateParams, 0, cuDevice));
|
||||
|
||||
// first search for the module path before we load the results
|
||||
string module_path;
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -59,6 +59,7 @@ template <class T> inline void getCudaAttribute(T *attribute, CUdevice_attribute
|
||||
|
||||
#endif /* CUDART_VERSION < 5000 */
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Program main
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@ -128,14 +129,20 @@ int main(int argc, char **argv)
|
||||
deviceProp.multiProcessorCount,
|
||||
_ConvertSMVer2Cores(deviceProp.major, deviceProp.minor),
|
||||
_ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount);
|
||||
int clockRate;
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, dev));
|
||||
printf(" GPU Max Clock rate: %.0f MHz (%0.2f "
|
||||
"GHz)\n",
|
||||
deviceProp.clockRate * 1e-3f,
|
||||
deviceProp.clockRate * 1e-6f);
|
||||
|
||||
clockRate * 1e-3f,
|
||||
clockRate * 1e-6f);
|
||||
#if CUDART_VERSION >= 5000
|
||||
// This is supported in CUDA 5.0 (runtime API device properties)
|
||||
printf(" Memory Clock rate: %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f);
|
||||
int memoryClockRate;
|
||||
#if CUDART_VERSION >= 13000
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&memoryClockRate, cudaDevAttrMemoryClockRate, dev));
|
||||
#else
|
||||
memoryClockRate = deviceProp.memoryClockRate;
|
||||
#endif
|
||||
printf(" Memory Clock rate: %.0f Mhz\n", memoryClockRate * 1e-3f);
|
||||
printf(" Memory Bus Width: %d-bit\n", deviceProp.memoryBusWidth);
|
||||
|
||||
if (deviceProp.l2CacheSize) {
|
||||
@ -194,12 +201,15 @@ int main(int argc, char **argv)
|
||||
deviceProp.maxGridSize[2]);
|
||||
printf(" Maximum memory pitch: %zu bytes\n", deviceProp.memPitch);
|
||||
printf(" Texture alignment: %zu bytes\n", deviceProp.textureAlignment);
|
||||
int gpuOverlap;
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&gpuOverlap, cudaDevAttrGpuOverlap, dev));
|
||||
printf(" Concurrent copy and kernel execution: %s with %d copy "
|
||||
"engine(s)\n",
|
||||
(deviceProp.deviceOverlap ? "Yes" : "No"),
|
||||
(gpuOverlap ? "Yes" : "No"),
|
||||
deviceProp.asyncEngineCount);
|
||||
printf(" Run time limit on kernels: %s\n",
|
||||
deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");
|
||||
int kernelExecTimeout;
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&kernelExecTimeout, cudaDevAttrKernelExecTimeout, dev));
|
||||
printf(" Run time limit on kernels: %s\n", kernelExecTimeout ? "Yes" : "No");
|
||||
printf(" Integrated GPU sharing Host Memory: %s\n", deviceProp.integrated ? "Yes" : "No");
|
||||
printf(" Support host page-locked memory mapping: %s\n", deviceProp.canMapHostMemory ? "Yes" : "No");
|
||||
printf(" Alignment requirement for Surfaces: %s\n", deviceProp.surfaceAlignment ? "Yes" : "No");
|
||||
@ -213,8 +223,11 @@ int main(int argc, char **argv)
|
||||
printf(" Device supports Compute Preemption: %s\n",
|
||||
deviceProp.computePreemptionSupported ? "Yes" : "No");
|
||||
printf(" Supports Cooperative Kernel Launch: %s\n", deviceProp.cooperativeLaunch ? "Yes" : "No");
|
||||
// The property cooperativeMultiDeviceLaunch is deprecated in CUDA 13.0
|
||||
#if CUDART_VERSION < 13000
|
||||
printf(" Supports MultiDevice Co-op Kernel Launch: %s\n",
|
||||
deviceProp.cooperativeMultiDeviceLaunch ? "Yes" : "No");
|
||||
#endif
|
||||
printf(" Device PCI Domain ID / Bus ID / location ID: %d / %d / %d\n",
|
||||
deviceProp.pciDomainID,
|
||||
deviceProp.pciBusID,
|
||||
@ -230,8 +243,10 @@ int main(int argc, char **argv)
|
||||
"::cudaSetDevice() with this device)",
|
||||
"Unknown",
|
||||
NULL};
|
||||
int computeMode;
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, dev));
|
||||
printf(" Compute Mode:\n");
|
||||
printf(" < %s >\n", sComputeMode[deviceProp.computeMode]);
|
||||
printf(" < %s >\n", sComputeMode[computeMode]);
|
||||
}
|
||||
|
||||
// If there are 2 or more GPUs, query to determine whether RDMA is supported
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -190,8 +190,9 @@ done:
|
||||
|
||||
CUresult cudaDeviceCreateConsumer(test_cuda_consumer_s *cudaConsumer)
|
||||
{
|
||||
CUdevice device;
|
||||
CUresult status = CUDA_SUCCESS;
|
||||
CUdevice device;
|
||||
CUresult status = CUDA_SUCCESS;
|
||||
CUctxCreateParams ctxCreateParams = {};
|
||||
|
||||
if (CUDA_SUCCESS != (status = cuInit(0))) {
|
||||
printf("Failed to initialize CUDA\n");
|
||||
@ -203,7 +204,7 @@ CUresult cudaDeviceCreateConsumer(test_cuda_consumer_s *cudaConsumer)
|
||||
return status;
|
||||
}
|
||||
|
||||
if (CUDA_SUCCESS != (status = cuCtxCreate(&cudaConsumer->context, 0, device))) {
|
||||
if (CUDA_SUCCESS != (status = cuCtxCreate(&cudaConsumer->context, &ctxCreateParams, 0, device))) {
|
||||
printf("failed to create CUDA context\n");
|
||||
return status;
|
||||
}
|
||||
|
@ -182,8 +182,9 @@ done:
|
||||
|
||||
CUresult cudaDeviceCreateProducer(test_cuda_producer_s *cudaProducer)
|
||||
{
|
||||
CUdevice device;
|
||||
CUresult status = CUDA_SUCCESS;
|
||||
CUdevice device;
|
||||
CUresult status = CUDA_SUCCESS;
|
||||
CUctxCreateParams ctxCreateParams = {};
|
||||
|
||||
if (CUDA_SUCCESS != (status = cuInit(0))) {
|
||||
printf("Failed to initialize CUDA\n");
|
||||
@ -195,7 +196,7 @@ CUresult cudaDeviceCreateProducer(test_cuda_producer_s *cudaProducer)
|
||||
return status;
|
||||
}
|
||||
|
||||
if (CUDA_SUCCESS != (status = cuCtxCreate(&cudaProducer->context, 0, device))) {
|
||||
if (CUDA_SUCCESS != (status = cuCtxCreate(&cudaProducer->context, &ctxCreateParams, 0, device))) {
|
||||
printf("failed to create CUDA context\n");
|
||||
return status;
|
||||
}
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -302,7 +302,8 @@ CUresult cudaDeviceCreateConsumer(test_cuda_consumer_s *cudaConsumer, CUdevice d
|
||||
major,
|
||||
minor);
|
||||
|
||||
if (CUDA_SUCCESS != (status = cuCtxCreate(&cudaConsumer->context, 0, device))) {
|
||||
CUctxCreateParams ctxCreateParams = {};
|
||||
if (CUDA_SUCCESS != (status = cuCtxCreate(&cudaConsumer->context, &ctxCreateParams, 0, device))) {
|
||||
printf("failed to create CUDA context\n");
|
||||
return status;
|
||||
}
|
||||
|
@ -316,7 +316,8 @@ CUresult cudaDeviceCreateProducer(test_cuda_producer_s *cudaProducer, CUdevice d
|
||||
exit(2); // EXIT_WAIVED
|
||||
}
|
||||
|
||||
if (CUDA_SUCCESS != (status = cuCtxCreate(&cudaProducer->context, 0, device))) {
|
||||
CUctxCreateParams ctxCreateParams = {};
|
||||
if (CUDA_SUCCESS != (status = cuCtxCreate(&cudaProducer->context, &ctxCreateParams, 0, device))) {
|
||||
printf("failed to create CUDA context\n");
|
||||
return status;
|
||||
}
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -69,6 +69,9 @@
|
||||
#include <thrust/sort.h>
|
||||
#include <thrust/unique.h>
|
||||
|
||||
// for cuda::std::identity
|
||||
#include <cuda/std/functional>
|
||||
|
||||
// Sample framework includes.
|
||||
#include <helper_cuda.h>
|
||||
#include <helper_functions.h>
|
||||
@ -680,7 +683,7 @@ private:
|
||||
thrust::make_counting_iterator(validEdgesCount),
|
||||
dEdgesFlags,
|
||||
dVertices_,
|
||||
thrust::identity<uint>())
|
||||
cuda::std::identity())
|
||||
.get();
|
||||
|
||||
pools.uintEdges.put(dEdgesFlags);
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -322,7 +322,9 @@ static void parentProcess(char *app)
|
||||
}
|
||||
// This sample requires two processes accessing each device, so we need
|
||||
// to ensure exclusive or prohibited mode is not set
|
||||
if (prop.computeMode != cudaComputeModeDefault) {
|
||||
int computeMode;
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, i));
|
||||
if (computeMode != cudaComputeModeDefault) {
|
||||
printf("Device %d is in an unsupported compute mode for this sample\n", i);
|
||||
continue;
|
||||
}
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||
if(ENABLE_CUDA_DEBUG)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user