diff --git a/CHANGELOG.md b/CHANGELOG.md index 89d75dc0..f2a2f99f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,63 @@ ## Changelog +### CUDA 13.0 +* Updated the samples using the cudaDeviceProp fields which are deprecated and removed in CUDA 13.0, replacing the fields with the equivalents in "cudaDeviceGetAttribute": + * Deprecated "cudaDeviceProp" fields + `int clockRate; // - Replaced with "cudaDevAttrClockRate"` + `int deviceOverlap; // - Replaced with "cudaDevAttrGpuOverlap */` + `int kernelExecTimeoutEnabled; // - Replaced with "cudaDevAttrKernelExecTimeout` + `int computeMode; // - Replaced with "cudaDevAttrComputeMode" */` + `int memoryClockRate; // - Replaced with "cudaDevAttrMemoryClockRate"` + `int cooperativeMultiDeviceLaunch; // - Deprecated, cudaLaunchCooperativeKernelMultiDevice is deprecated.` + * `0_Introduction` + * `UnifiedMemoryStreams` + * `simpleHyperQ` + * `simpleIPC` + * `simpleMultiCopy` + * `systemWideAtomics` + * `1_Utilitie` + * `deviceQuery` + * `2_Concepts_and_Techniques` + * `streamOrderedAllocationIPC` + * `4_CUDA_Libraries` + * `simpleCUBLASXT` + * `5_Domain_Specific` + * `simpleVulkan` + * `vulkanImageCUDA` +* Updated the samples using the CUDA driver API "cuCtxCreate" with adding the parameter "CUctxCreateParams" as "cuCtxCreate" is updated to "cuCtxCreate_v4" by default in CUDA 13.0: + * `Common` + * `nvrtc_helper.h` + * `0_Introduction` + * `UnifiedMemoryStreams` + * `matrixMulDrv` + * `simpleTextureDrv` + * `vectorAddDrv` + * `vectorAddMMAP` + * `2_Concepts_and_Techniques` + * `EGLStream_CUDA_CrossGPU` + * `EGLStream_CUDA_Interop` + * `threadMigration` + * `3_CUDA_Features` + * `graphMemoryFootprint` + * `memMapIPCDrv` + * `4_CUDA_Libraries` + * `jitLto` + * `7_libNVVM` + * `device-side-launch` + * `simple` + * `uvmlite` +* Updated the sample using CUDA API "cudaGraphAddNode"/"cudaStreamGetCaptureInfo" with adding "cudaGraphEdgeData" pointer parameter as they are updated to "cudaGraphAddNode_v2"/"cudaStreamGetCaptureInfo_v3" by default in CUDA 13.0: + * `3_CUDA_Features` + * `graphConditionalNodes` +* Updated the samples using CUDA API "cudaMemAdvise"/"cudaMemPrefetchAsync" with changing the parameter "int device" to "cudaMemLocation location" as they are updated to "cudaMemAdvise_v2"/"cudaMemPrefetchAsyn_v2" by default in CUDA 13.0. + * `4_CUDA_Libraries` + * `conjugateGradientMultiDeviceCG` + * `6_Performance` + * `UnifiedMemoryPerf` +* Replaced "thrust::identity()" with "cuda::std::identity()" as it is deprecated in CUDA 13.0. + * `2_Concepts_and_Techniques` + * `segmentationTreeThrust` + ### CUDA 12.9 * Updated toolchain for cross-compilation for Tegra Linux platforms. * Added `run_tests.py` utility to exercise all samples. See README.md for details diff --git a/CMakeLists.txt b/CMakeLists.txt index f95e7aae..7108169c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CUDA_STANDARD 17) set(CMAKE_CUDA_STANDARD_REQUIRED ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Common/nvrtc_helper.h b/Common/nvrtc_helper.h index 4544b194..aab3c681 100644 --- a/Common/nvrtc_helper.h +++ b/Common/nvrtc_helper.h @@ -187,6 +187,7 @@ CUmodule loadCUBIN(char *cubin, int argc, char **argv) { CUcontext context; int major = 0, minor = 0; char deviceName[256]; + CUctxCreateParams ctxCreateParams = {}; // Picks the best CUDA device available CUdevice cuDevice = findCudaDeviceDRV(argc, (const char **)argv); @@ -200,7 +201,7 @@ CUmodule loadCUBIN(char *cubin, int argc, char **argv) { printf("> GPU Device has SM %d.%d compute capability\n", major, minor); checkCudaErrors(cuInit(0)); - checkCudaErrors(cuCtxCreate(&context, 0, cuDevice)); + checkCudaErrors(cuCtxCreate(&context, &ctxCreateParams, 0, cuDevice)); checkCudaErrors(cuModuleLoadData(&module, cubin)); free(cubin); diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt b/Samples/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt index 090cfe2c..15f95e2b 100644 --- a/Samples/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt +++ b/Samples/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams.cu b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams.cu index ca5cfcbd..cbede7d6 100644 --- a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams.cu +++ b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams.cu @@ -247,7 +247,9 @@ int main(int argc, char **argv) exit(EXIT_WAIVED); } - if (device_prop.computeMode == cudaComputeModeProhibited) { + int computeMode; + checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, dev_id)); + if (computeMode == cudaComputeModeProhibited) { // This sample requires being run with a default or process exclusive mode fprintf(stderr, "This sample requires a device in either default or process " diff --git a/Samples/0_Introduction/asyncAPI/CMakeLists.txt b/Samples/0_Introduction/asyncAPI/CMakeLists.txt index 6fed8cf7..9fa53322 100644 --- a/Samples/0_Introduction/asyncAPI/CMakeLists.txt +++ b/Samples/0_Introduction/asyncAPI/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/clock/CMakeLists.txt b/Samples/0_Introduction/clock/CMakeLists.txt index 740f03e9..3143081e 100644 --- a/Samples/0_Introduction/clock/CMakeLists.txt +++ b/Samples/0_Introduction/clock/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/clock_nvrtc/CMakeLists.txt b/Samples/0_Introduction/clock_nvrtc/CMakeLists.txt index a9ad566f..2a8caa6a 100644 --- a/Samples/0_Introduction/clock_nvrtc/CMakeLists.txt +++ b/Samples/0_Introduction/clock_nvrtc/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/cudaOpenMP/CMakeLists.txt b/Samples/0_Introduction/cudaOpenMP/CMakeLists.txt index 886e0838..a2ee6c3b 100644 --- a/Samples/0_Introduction/cudaOpenMP/CMakeLists.txt +++ b/Samples/0_Introduction/cudaOpenMP/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/fp16ScalarProduct/CMakeLists.txt b/Samples/0_Introduction/fp16ScalarProduct/CMakeLists.txt index 4d265d49..611f7d6b 100644 --- a/Samples/0_Introduction/fp16ScalarProduct/CMakeLists.txt +++ b/Samples/0_Introduction/fp16ScalarProduct/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) else() diff --git a/Samples/0_Introduction/matrixMul/CMakeLists.txt b/Samples/0_Introduction/matrixMul/CMakeLists.txt index d0acc448..7f6662cf 100644 --- a/Samples/0_Introduction/matrixMul/CMakeLists.txt +++ b/Samples/0_Introduction/matrixMul/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt b/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt index 85cad9d3..c819b54f 100644 --- a/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt +++ b/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv.cpp b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv.cpp index 8f3c83d3..815801b8 100644 --- a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv.cpp +++ b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv.cpp @@ -268,9 +268,10 @@ void randomInit(float *data, int size) static int initCUDA(int argc, char **argv, CUfunction *pMatrixMul, int *blk_size) { - CUfunction cuFunction = 0; - int major = 0, minor = 0; - char deviceName[100]; + CUfunction cuFunction = 0; + int major = 0, minor = 0; + char deviceName[100]; + CUctxCreateParams ctxCreateParams = {}; cuDevice = findCudaDeviceDRV(argc, (const char **)argv); @@ -283,7 +284,7 @@ static int initCUDA(int argc, char **argv, CUfunction *pMatrixMul, int *blk_size checkCudaErrors(cuDeviceTotalMem(&totalGlobalMem, cuDevice)); printf(" Total amount of global memory: %llu bytes\n", (long long unsigned int)totalGlobalMem); - checkCudaErrors(cuCtxCreate(&cuContext, 0, cuDevice)); + checkCudaErrors(cuCtxCreate(&cuContext, &ctxCreateParams, 0, cuDevice)); // first search for the module path before we load the results std::string module_path; diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt b/Samples/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt index 6e4f044c..d46748da 100644 --- a/Samples/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt +++ b/Samples/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/matrixMul_nvrtc/CMakeLists.txt b/Samples/0_Introduction/matrixMul_nvrtc/CMakeLists.txt index 91148968..82eb3cee 100644 --- a/Samples/0_Introduction/matrixMul_nvrtc/CMakeLists.txt +++ b/Samples/0_Introduction/matrixMul_nvrtc/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/mergeSort/CMakeLists.txt b/Samples/0_Introduction/mergeSort/CMakeLists.txt index 82583db8..a7f2eec3 100644 --- a/Samples/0_Introduction/mergeSort/CMakeLists.txt +++ b/Samples/0_Introduction/mergeSort/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleAWBarrier/CMakeLists.txt b/Samples/0_Introduction/simpleAWBarrier/CMakeLists.txt index b87c63d0..41047846 100644 --- a/Samples/0_Introduction/simpleAWBarrier/CMakeLists.txt +++ b/Samples/0_Introduction/simpleAWBarrier/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/0_Introduction/simpleAssert/CMakeLists.txt b/Samples/0_Introduction/simpleAssert/CMakeLists.txt index cf5e3729..e29c3e64 100644 --- a/Samples/0_Introduction/simpleAssert/CMakeLists.txt +++ b/Samples/0_Introduction/simpleAssert/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/CMakeLists.txt b/Samples/0_Introduction/simpleAssert_nvrtc/CMakeLists.txt index 7953acbe..18b68c2d 100644 --- a/Samples/0_Introduction/simpleAssert_nvrtc/CMakeLists.txt +++ b/Samples/0_Introduction/simpleAssert_nvrtc/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/CMakeLists.txt b/Samples/0_Introduction/simpleAtomicIntrinsics/CMakeLists.txt index 594aeb1e..60b8eb1f 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics/CMakeLists.txt +++ b/Samples/0_Introduction/simpleAtomicIntrinsics/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/CMakeLists.txt b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/CMakeLists.txt index 0eae7168..870672f0 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/CMakeLists.txt +++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleAttributes/CMakeLists.txt b/Samples/0_Introduction/simpleAttributes/CMakeLists.txt index 92d61f7e..e202b797 100644 --- a/Samples/0_Introduction/simpleAttributes/CMakeLists.txt +++ b/Samples/0_Introduction/simpleAttributes/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleCUDA2GL/CMakeLists.txt b/Samples/0_Introduction/simpleCUDA2GL/CMakeLists.txt index fef94f01..d5b253d3 100644 --- a/Samples/0_Introduction/simpleCUDA2GL/CMakeLists.txt +++ b/Samples/0_Introduction/simpleCUDA2GL/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleCallback/CMakeLists.txt b/Samples/0_Introduction/simpleCallback/CMakeLists.txt index 34b22185..9969fd4e 100644 --- a/Samples/0_Introduction/simpleCallback/CMakeLists.txt +++ b/Samples/0_Introduction/simpleCallback/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleCooperativeGroups/CMakeLists.txt b/Samples/0_Introduction/simpleCooperativeGroups/CMakeLists.txt index 38729258..6d6a7feb 100644 --- a/Samples/0_Introduction/simpleCooperativeGroups/CMakeLists.txt +++ b/Samples/0_Introduction/simpleCooperativeGroups/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleCubemapTexture/CMakeLists.txt b/Samples/0_Introduction/simpleCubemapTexture/CMakeLists.txt index ea696b01..161af367 100644 --- a/Samples/0_Introduction/simpleCubemapTexture/CMakeLists.txt +++ b/Samples/0_Introduction/simpleCubemapTexture/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt b/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt index 197de448..c3589eed 100644 --- a/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt +++ b/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime.cpp b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime.cpp index 77bcf10a..d4499e00 100644 --- a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime.cpp +++ b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime.cpp @@ -80,19 +80,20 @@ static void check(CUresult result, char const *const func, const char *const fil int main(int argc, char **argv) { printf("simpleDrvRuntime..\n"); - int N = 50000, devID = 0; - size_t size = N * sizeof(float); - CUdevice cuDevice; - CUfunction vecAdd_kernel; - CUmodule cuModule = 0; - CUcontext cuContext; + int N = 50000, devID = 0; + size_t size = N * sizeof(float); + CUdevice cuDevice; + CUfunction vecAdd_kernel; + CUmodule cuModule = 0; + CUcontext cuContext; + CUctxCreateParams ctxCreateParams = {}; // Initialize checkCudaDrvErrors(cuInit(0)); cuDevice = findCudaDevice(argc, (const char **)argv); // Create context - checkCudaDrvErrors(cuCtxCreate(&cuContext, 0, cuDevice)); + checkCudaDrvErrors(cuCtxCreate(&cuContext, &ctxCreateParams, 0, cuDevice)); // first search for the module path before we load the results string module_path; diff --git a/Samples/0_Introduction/simpleHyperQ/CMakeLists.txt b/Samples/0_Introduction/simpleHyperQ/CMakeLists.txt index be63170f..4df82952 100644 --- a/Samples/0_Introduction/simpleHyperQ/CMakeLists.txt +++ b/Samples/0_Introduction/simpleHyperQ/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ.cu b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ.cu index 2972d88b..2a1ac958 100644 --- a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ.cu +++ b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ.cu @@ -127,6 +127,10 @@ int main(int argc, char **argv) checkCudaErrors(cudaGetDevice(&cuda_device)); checkCudaErrors(cudaGetDeviceProperties(&deviceProp, cuda_device)); + // Get device clock rate + int clockRate; + checkCudaErrors(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, cuda_device)); + // HyperQ is available in devices of Compute Capability 3.5 and higher if (deviceProp.major < 3 || (deviceProp.major == 3 && deviceProp.minor < 5)) { if (deviceProp.concurrentKernels == 0) { @@ -170,9 +174,9 @@ int main(int argc, char **argv) #if defined(__arm__) || defined(__aarch64__) // the kernel takes more time than the channel reset time on arm archs, so to // prevent hangs reduce time_clocks. - clock_t time_clocks = (clock_t)(kernel_time * (deviceProp.clockRate / 100)); + clock_t time_clocks = (clock_t)(kernel_time * (clockRate / 100)); #else - clock_t time_clocks = (clock_t)(kernel_time * deviceProp.clockRate); + clock_t time_clocks = (clock_t)(kernel_time * clockRate); #endif clock_t total_clocks = 0; diff --git a/Samples/0_Introduction/simpleIPC/CMakeLists.txt b/Samples/0_Introduction/simpleIPC/CMakeLists.txt index fdf21f87..51be28e7 100644 --- a/Samples/0_Introduction/simpleIPC/CMakeLists.txt +++ b/Samples/0_Introduction/simpleIPC/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleIPC/simpleIPC.cu b/Samples/0_Introduction/simpleIPC/simpleIPC.cu index ab59fc4d..29403ca5 100644 --- a/Samples/0_Introduction/simpleIPC/simpleIPC.cu +++ b/Samples/0_Introduction/simpleIPC/simpleIPC.cu @@ -247,7 +247,9 @@ static void parentProcess(char *app) } // This sample requires two processes accessing each device, so we need // to ensure exclusive or prohibited mode is not set - if (prop.computeMode != cudaComputeModeDefault) { + int computeMode; + checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, i)); + if (computeMode != cudaComputeModeDefault) { printf("Device %d is in an unsupported compute mode for this sample\n", i); continue; } diff --git a/Samples/0_Introduction/simpleLayeredTexture/CMakeLists.txt b/Samples/0_Introduction/simpleLayeredTexture/CMakeLists.txt index bae6dd8d..7d937956 100644 --- a/Samples/0_Introduction/simpleLayeredTexture/CMakeLists.txt +++ b/Samples/0_Introduction/simpleLayeredTexture/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleMPI/CMakeLists.txt b/Samples/0_Introduction/simpleMPI/CMakeLists.txt index 15466d28..5816bf48 100644 --- a/Samples/0_Introduction/simpleMPI/CMakeLists.txt +++ b/Samples/0_Introduction/simpleMPI/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleMultiCopy/CMakeLists.txt b/Samples/0_Introduction/simpleMultiCopy/CMakeLists.txt index f48c6e03..cc003b37 100644 --- a/Samples/0_Introduction/simpleMultiCopy/CMakeLists.txt +++ b/Samples/0_Introduction/simpleMultiCopy/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy.cu b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy.cu index 20e0d450..de28f396 100644 --- a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy.cu +++ b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy.cu @@ -218,9 +218,11 @@ int main(int argc, char *argv[]) printf("\n"); printf("Relevant properties of this CUDA device\n"); + int canOverlap; + checkCudaErrors(cudaDeviceGetAttribute(&canOverlap, cudaDevAttrGpuOverlap, cuda_device)); printf("(%s) Can overlap one CPU<>GPU data transfer with GPU kernel execution " - "(device property \"deviceOverlap\")\n", - deviceProp.deviceOverlap ? "X" : " "); + "(device property \"cudaDevAttrGpuOverlap\")\n", + canOverlap ? "X" : " "); // printf("(%s) Can execute several GPU kernels simultaneously (compute // capability >= 2.0)\n", deviceProp.major >= 2 ? "X": " "); printf("(%s) Can overlap two CPU<>GPU data transfers with GPU kernel execution\n" diff --git a/Samples/0_Introduction/simpleMultiGPU/CMakeLists.txt b/Samples/0_Introduction/simpleMultiGPU/CMakeLists.txt index ac73641c..b6b6c170 100644 --- a/Samples/0_Introduction/simpleMultiGPU/CMakeLists.txt +++ b/Samples/0_Introduction/simpleMultiGPU/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleOccupancy/CMakeLists.txt b/Samples/0_Introduction/simpleOccupancy/CMakeLists.txt index 6859a157..a434f923 100644 --- a/Samples/0_Introduction/simpleOccupancy/CMakeLists.txt +++ b/Samples/0_Introduction/simpleOccupancy/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleP2P/CMakeLists.txt b/Samples/0_Introduction/simpleP2P/CMakeLists.txt index f0b82c6e..2ead08fa 100644 --- a/Samples/0_Introduction/simpleP2P/CMakeLists.txt +++ b/Samples/0_Introduction/simpleP2P/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simplePitchLinearTexture/CMakeLists.txt b/Samples/0_Introduction/simplePitchLinearTexture/CMakeLists.txt index 6d431f62..7273aabd 100644 --- a/Samples/0_Introduction/simplePitchLinearTexture/CMakeLists.txt +++ b/Samples/0_Introduction/simplePitchLinearTexture/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simplePrintf/CMakeLists.txt b/Samples/0_Introduction/simplePrintf/CMakeLists.txt index f3ec60b8..abfef0a5 100644 --- a/Samples/0_Introduction/simplePrintf/CMakeLists.txt +++ b/Samples/0_Introduction/simplePrintf/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleStreams/CMakeLists.txt b/Samples/0_Introduction/simpleStreams/CMakeLists.txt index dce86206..1bfbf47e 100644 --- a/Samples/0_Introduction/simpleStreams/CMakeLists.txt +++ b/Samples/0_Introduction/simpleStreams/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleSurfaceWrite/CMakeLists.txt b/Samples/0_Introduction/simpleSurfaceWrite/CMakeLists.txt index 5c65251e..2ba8e6b0 100644 --- a/Samples/0_Introduction/simpleSurfaceWrite/CMakeLists.txt +++ b/Samples/0_Introduction/simpleSurfaceWrite/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleTemplates/CMakeLists.txt b/Samples/0_Introduction/simpleTemplates/CMakeLists.txt index a821e7bb..253a1b1d 100644 --- a/Samples/0_Introduction/simpleTemplates/CMakeLists.txt +++ b/Samples/0_Introduction/simpleTemplates/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleTexture/CMakeLists.txt b/Samples/0_Introduction/simpleTexture/CMakeLists.txt index 688116ba..5c438915 100644 --- a/Samples/0_Introduction/simpleTexture/CMakeLists.txt +++ b/Samples/0_Introduction/simpleTexture/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleTexture3D/CMakeLists.txt b/Samples/0_Introduction/simpleTexture3D/CMakeLists.txt index 1917544b..bcc480f8 100644 --- a/Samples/0_Introduction/simpleTexture3D/CMakeLists.txt +++ b/Samples/0_Introduction/simpleTexture3D/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt b/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt index ed501b38..37b023ef 100644 --- a/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt +++ b/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv.cpp b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv.cpp index 322b7eb5..aa6b2627 100644 --- a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv.cpp +++ b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv.cpp @@ -309,10 +309,11 @@ void runTest(int argc, char **argv) //////////////////////////////////////////////////////////////////////////////// static CUresult initCUDA(int argc, char **argv, CUfunction *transform) { - CUfunction cuFunction = 0; - int major = 0, minor = 0, devID = 0; - char deviceName[100]; - string module_path; + CUfunction cuFunction = 0; + int major = 0, minor = 0, devID = 0; + char deviceName[100]; + string module_path; + CUctxCreateParams ctxCreateParams = {}; cuDevice = findCudaDeviceDRV(argc, (const char **)argv); @@ -322,7 +323,7 @@ static CUresult initCUDA(int argc, char **argv, CUfunction *transform) checkCudaErrors(cuDeviceGetName(deviceName, sizeof(deviceName), cuDevice)); printf("> GPU Device has SM %d.%d compute capability\n", major, minor); - checkCudaErrors(cuCtxCreate(&cuContext, 0, cuDevice)); + checkCudaErrors(cuCtxCreate(&cuContext, &ctxCreateParams, 0, cuDevice)); // first search for the module_path before we try to load the results std::ostringstream fatbin; diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/CMakeLists.txt b/Samples/0_Introduction/simpleVoteIntrinsics/CMakeLists.txt index 1b151bcf..15d1b8f3 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics/CMakeLists.txt +++ b/Samples/0_Introduction/simpleVoteIntrinsics/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/simpleZeroCopy/CMakeLists.txt b/Samples/0_Introduction/simpleZeroCopy/CMakeLists.txt index 1e93e05a..95f93999 100644 --- a/Samples/0_Introduction/simpleZeroCopy/CMakeLists.txt +++ b/Samples/0_Introduction/simpleZeroCopy/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/systemWideAtomics/CMakeLists.txt b/Samples/0_Introduction/systemWideAtomics/CMakeLists.txt index 3e50470c..a820d170 100644 --- a/Samples/0_Introduction/systemWideAtomics/CMakeLists.txt +++ b/Samples/0_Introduction/systemWideAtomics/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) else() diff --git a/Samples/0_Introduction/systemWideAtomics/systemWideAtomics.cu b/Samples/0_Introduction/systemWideAtomics/systemWideAtomics.cu index 28e40ebc..121ba207 100644 --- a/Samples/0_Introduction/systemWideAtomics/systemWideAtomics.cu +++ b/Samples/0_Introduction/systemWideAtomics/systemWideAtomics.cu @@ -287,7 +287,9 @@ int main(int argc, char **argv) exit(EXIT_WAIVED); } - if (device_prop.computeMode == cudaComputeModeProhibited) { + int computeMode; + checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, dev_id)); + if (computeMode == cudaComputeModeProhibited) { // This sample requires being run with a default or process exclusive mode fprintf(stderr, "This sample requires a device in either default or process " diff --git a/Samples/0_Introduction/template/CMakeLists.txt b/Samples/0_Introduction/template/CMakeLists.txt index 4aa8c0dd..4f6a66a1 100644 --- a/Samples/0_Introduction/template/CMakeLists.txt +++ b/Samples/0_Introduction/template/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/vectorAdd/CMakeLists.txt b/Samples/0_Introduction/vectorAdd/CMakeLists.txt index 4c6ef8cd..83b8b5fb 100644 --- a/Samples/0_Introduction/vectorAdd/CMakeLists.txt +++ b/Samples/0_Introduction/vectorAdd/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt b/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt index bf4b8255..c2ae197d 100644 --- a/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt +++ b/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv.cpp b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv.cpp index 16f6cbfd..e6a1a051 100644 --- a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv.cpp +++ b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv.cpp @@ -75,15 +75,16 @@ bool findModulePath(const char *, string &, char **, string &); int main(int argc, char **argv) { printf("Vector Addition (Driver API)\n"); - int N = 50000, devID = 0; - size_t size = N * sizeof(float); + int N = 50000, devID = 0; + size_t size = N * sizeof(float); + CUctxCreateParams ctxCreateParams = {}; // Initialize checkCudaErrors(cuInit(0)); cuDevice = findCudaDeviceDRV(argc, (const char **)argv); // Create context - checkCudaErrors(cuCtxCreate(&cuContext, 0, cuDevice)); + checkCudaErrors(cuCtxCreate(&cuContext, &ctxCreateParams, 0, cuDevice)); // first search for the module path before we load the results string module_path; diff --git a/Samples/0_Introduction/vectorAddMMAP/CMakeLists.txt b/Samples/0_Introduction/vectorAddMMAP/CMakeLists.txt index 73cbeca8..6941b45e 100644 --- a/Samples/0_Introduction/vectorAddMMAP/CMakeLists.txt +++ b/Samples/0_Introduction/vectorAddMMAP/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP.cpp b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP.cpp index f0249e78..c5c28ff5 100644 --- a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP.cpp +++ b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP.cpp @@ -141,10 +141,11 @@ int main(int argc, char **argv) // Collect devices accessible by the mapping device (cuDevice) into the // backingDevices vector. - vector backingDevices = getBackingDevices(cuDevice); + vector backingDevices = getBackingDevices(cuDevice); + CUctxCreateParams ctxCreateParams = {}; // Create context - checkCudaErrors(cuCtxCreate(&cuContext, 0, cuDevice)); + checkCudaErrors(cuCtxCreate(&cuContext, &ctxCreateParams, 0, cuDevice)); // first search for the module path before we load the results string module_path; diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/CMakeLists.txt b/Samples/0_Introduction/vectorAdd_nvrtc/CMakeLists.txt index 331d275b..b266274a 100644 --- a/Samples/0_Introduction/vectorAdd_nvrtc/CMakeLists.txt +++ b/Samples/0_Introduction/vectorAdd_nvrtc/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/1_Utilities/deviceQuery/CMakeLists.txt b/Samples/1_Utilities/deviceQuery/CMakeLists.txt index 4b308e9c..0726351f 100644 --- a/Samples/1_Utilities/deviceQuery/CMakeLists.txt +++ b/Samples/1_Utilities/deviceQuery/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/1_Utilities/deviceQuery/deviceQuery.cpp b/Samples/1_Utilities/deviceQuery/deviceQuery.cpp index 4deffb87..4d7da97b 100644 --- a/Samples/1_Utilities/deviceQuery/deviceQuery.cpp +++ b/Samples/1_Utilities/deviceQuery/deviceQuery.cpp @@ -59,6 +59,7 @@ template inline void getCudaAttribute(T *attribute, CUdevice_attribute #endif /* CUDART_VERSION < 5000 */ + //////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// @@ -128,14 +129,20 @@ int main(int argc, char **argv) deviceProp.multiProcessorCount, _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor), _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount); + int clockRate; + checkCudaErrors(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, dev)); printf(" GPU Max Clock rate: %.0f MHz (%0.2f " "GHz)\n", - deviceProp.clockRate * 1e-3f, - deviceProp.clockRate * 1e-6f); - + clockRate * 1e-3f, + clockRate * 1e-6f); #if CUDART_VERSION >= 5000 - // This is supported in CUDA 5.0 (runtime API device properties) - printf(" Memory Clock rate: %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f); + int memoryClockRate; +#if CUDART_VERSION >= 13000 + checkCudaErrors(cudaDeviceGetAttribute(&memoryClockRate, cudaDevAttrMemoryClockRate, dev)); +#else + memoryClockRate = deviceProp.memoryClockRate; +#endif + printf(" Memory Clock rate: %.0f Mhz\n", memoryClockRate * 1e-3f); printf(" Memory Bus Width: %d-bit\n", deviceProp.memoryBusWidth); if (deviceProp.l2CacheSize) { @@ -194,12 +201,15 @@ int main(int argc, char **argv) deviceProp.maxGridSize[2]); printf(" Maximum memory pitch: %zu bytes\n", deviceProp.memPitch); printf(" Texture alignment: %zu bytes\n", deviceProp.textureAlignment); + int gpuOverlap; + checkCudaErrors(cudaDeviceGetAttribute(&gpuOverlap, cudaDevAttrGpuOverlap, dev)); printf(" Concurrent copy and kernel execution: %s with %d copy " "engine(s)\n", - (deviceProp.deviceOverlap ? "Yes" : "No"), + (gpuOverlap ? "Yes" : "No"), deviceProp.asyncEngineCount); - printf(" Run time limit on kernels: %s\n", - deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No"); + int kernelExecTimeout; + checkCudaErrors(cudaDeviceGetAttribute(&kernelExecTimeout, cudaDevAttrKernelExecTimeout, dev)); + printf(" Run time limit on kernels: %s\n", kernelExecTimeout ? "Yes" : "No"); printf(" Integrated GPU sharing Host Memory: %s\n", deviceProp.integrated ? "Yes" : "No"); printf(" Support host page-locked memory mapping: %s\n", deviceProp.canMapHostMemory ? "Yes" : "No"); printf(" Alignment requirement for Surfaces: %s\n", deviceProp.surfaceAlignment ? "Yes" : "No"); @@ -213,8 +223,11 @@ int main(int argc, char **argv) printf(" Device supports Compute Preemption: %s\n", deviceProp.computePreemptionSupported ? "Yes" : "No"); printf(" Supports Cooperative Kernel Launch: %s\n", deviceProp.cooperativeLaunch ? "Yes" : "No"); + // The property cooperativeMultiDeviceLaunch is deprecated in CUDA 13.0 +#if CUDART_VERSION < 13000 printf(" Supports MultiDevice Co-op Kernel Launch: %s\n", deviceProp.cooperativeMultiDeviceLaunch ? "Yes" : "No"); +#endif printf(" Device PCI Domain ID / Bus ID / location ID: %d / %d / %d\n", deviceProp.pciDomainID, deviceProp.pciBusID, @@ -230,8 +243,10 @@ int main(int argc, char **argv) "::cudaSetDevice() with this device)", "Unknown", NULL}; + int computeMode; + checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, dev)); printf(" Compute Mode:\n"); - printf(" < %s >\n", sComputeMode[deviceProp.computeMode]); + printf(" < %s >\n", sComputeMode[computeMode]); } // If there are 2 or more GPUs, query to determine whether RDMA is supported diff --git a/Samples/1_Utilities/deviceQueryDrv/CMakeLists.txt b/Samples/1_Utilities/deviceQueryDrv/CMakeLists.txt index 1422633a..957b8d67 100644 --- a/Samples/1_Utilities/deviceQueryDrv/CMakeLists.txt +++ b/Samples/1_Utilities/deviceQueryDrv/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/1_Utilities/topologyQuery/CMakeLists.txt b/Samples/1_Utilities/topologyQuery/CMakeLists.txt index f5489592..d96315cb 100644 --- a/Samples/1_Utilities/topologyQuery/CMakeLists.txt +++ b/Samples/1_Utilities/topologyQuery/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/CMakeLists.txt index d07e7e06..e3c6f762 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_consumer.cpp b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_consumer.cpp index a1bbc02a..fc1c5418 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_consumer.cpp +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_consumer.cpp @@ -190,8 +190,9 @@ done: CUresult cudaDeviceCreateConsumer(test_cuda_consumer_s *cudaConsumer) { - CUdevice device; - CUresult status = CUDA_SUCCESS; + CUdevice device; + CUresult status = CUDA_SUCCESS; + CUctxCreateParams ctxCreateParams = {}; if (CUDA_SUCCESS != (status = cuInit(0))) { printf("Failed to initialize CUDA\n"); @@ -203,7 +204,7 @@ CUresult cudaDeviceCreateConsumer(test_cuda_consumer_s *cudaConsumer) return status; } - if (CUDA_SUCCESS != (status = cuCtxCreate(&cudaConsumer->context, 0, device))) { + if (CUDA_SUCCESS != (status = cuCtxCreate(&cudaConsumer->context, &ctxCreateParams, 0, device))) { printf("failed to create CUDA context\n"); return status; } diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_producer.cpp b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_producer.cpp index e862e541..2416bb37 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_producer.cpp +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_producer.cpp @@ -182,8 +182,9 @@ done: CUresult cudaDeviceCreateProducer(test_cuda_producer_s *cudaProducer) { - CUdevice device; - CUresult status = CUDA_SUCCESS; + CUdevice device; + CUresult status = CUDA_SUCCESS; + CUctxCreateParams ctxCreateParams = {}; if (CUDA_SUCCESS != (status = cuInit(0))) { printf("Failed to initialize CUDA\n"); @@ -195,7 +196,7 @@ CUresult cudaDeviceCreateProducer(test_cuda_producer_s *cudaProducer) return status; } - if (CUDA_SUCCESS != (status = cuCtxCreate(&cudaProducer->context, 0, device))) { + if (CUDA_SUCCESS != (status = cuCtxCreate(&cudaProducer->context, &ctxCreateParams, 0, device))) { printf("failed to create CUDA context\n"); return status; } diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/CMakeLists.txt index 44d3ece4..4f14ce9e 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_consumer.cpp b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_consumer.cpp index 358b8a16..08f4b8c8 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_consumer.cpp +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_consumer.cpp @@ -302,7 +302,8 @@ CUresult cudaDeviceCreateConsumer(test_cuda_consumer_s *cudaConsumer, CUdevice d major, minor); - if (CUDA_SUCCESS != (status = cuCtxCreate(&cudaConsumer->context, 0, device))) { + CUctxCreateParams ctxCreateParams = {}; + if (CUDA_SUCCESS != (status = cuCtxCreate(&cudaConsumer->context, &ctxCreateParams, 0, device))) { printf("failed to create CUDA context\n"); return status; } diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp index 6d356841..c379291f 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp @@ -316,7 +316,8 @@ CUresult cudaDeviceCreateProducer(test_cuda_producer_s *cudaProducer, CUdevice d exit(2); // EXIT_WAIVED } - if (CUDA_SUCCESS != (status = cuCtxCreate(&cudaProducer->context, 0, device))) { + CUctxCreateParams ctxCreateParams = {}; + if (CUDA_SUCCESS != (status = cuCtxCreate(&cudaProducer->context, &ctxCreateParams, 0, device))) { printf("failed to create CUDA context\n"); return status; } diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/FunctionPointers/CMakeLists.txt index 56431c39..5ff868cf 100644 --- a/Samples/2_Concepts_and_Techniques/FunctionPointers/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/CMakeLists.txt index f13f993b..a40b6285 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/CMakeLists.txt index f3738b73..36e64592 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/CMakeLists.txt index 4a616160..f19fe04e 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/CMakeLists.txt index 2ac32b13..22cf90cb 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/CMakeLists.txt index 991b3f05..c57b6d37 100644 --- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/boxFilter/CMakeLists.txt index a6420e95..0a4011f0 100644 --- a/Samples/2_Concepts_and_Techniques/boxFilter/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/boxFilter/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/convolutionSeparable/CMakeLists.txt index 6d5face7..e5593740 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/convolutionTexture/CMakeLists.txt index b5cf2494..345d2a37 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionTexture/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/dct8x8/CMakeLists.txt index 2044da7d..e2235ceb 100644 --- a/Samples/2_Concepts_and_Techniques/dct8x8/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/dct8x8/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/eigenvalues/CMakeLists.txt index b4f8b900..d19e96ff 100644 --- a/Samples/2_Concepts_and_Techniques/eigenvalues/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/eigenvalues/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/histogram/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/histogram/CMakeLists.txt index 293dff7c..57e44e0e 100644 --- a/Samples/2_Concepts_and_Techniques/histogram/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/histogram/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/imageDenoising/CMakeLists.txt index 63923d77..eb0429c7 100644 --- a/Samples/2_Concepts_and_Techniques/imageDenoising/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/imageDenoising/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/inlinePTX/CMakeLists.txt index 65342964..a3e6956c 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/inlinePTX/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/CMakeLists.txt index 5434fcf0..10aa4e06 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/interval/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/interval/CMakeLists.txt index 59715f12..b6440b8c 100644 --- a/Samples/2_Concepts_and_Techniques/interval/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/interval/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/particles/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/particles/CMakeLists.txt index 6afbfe4a..e833507e 100644 --- a/Samples/2_Concepts_and_Techniques/particles/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/particles/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/radixSortThrust/CMakeLists.txt index e5a68b2e..62d70eb3 100644 --- a/Samples/2_Concepts_and_Techniques/radixSortThrust/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/reduction/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/reduction/CMakeLists.txt index 597bfe20..c5e52c52 100644 --- a/Samples/2_Concepts_and_Techniques/reduction/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/reduction/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/CMakeLists.txt index e8195680..3ae42594 100644 --- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/scalarProd/CMakeLists.txt index e641f84d..42757c0f 100644 --- a/Samples/2_Concepts_and_Techniques/scalarProd/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/scalarProd/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/scan/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/scan/CMakeLists.txt index 61b70d9b..5d83cf09 100644 --- a/Samples/2_Concepts_and_Techniques/scan/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/scan/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/CMakeLists.txt index 5c210e3d..f3a95476 100644 --- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTree.cu b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTree.cu index 7d7c84df..b0c63605 100644 --- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTree.cu +++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTree.cu @@ -69,6 +69,9 @@ #include #include +// for cuda::std::identity +#include + // Sample framework includes. #include #include @@ -680,7 +683,7 @@ private: thrust::make_counting_iterator(validEdgesCount), dEdgesFlags, dVertices_, - thrust::identity()) + cuda::std::identity()) .get(); pools.uintEdges.put(dEdgesFlags); diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/shfl_scan/CMakeLists.txt index ad697e60..cb44e112 100644 --- a/Samples/2_Concepts_and_Techniques/shfl_scan/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/shfl_scan/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/sortingNetworks/CMakeLists.txt index fd183a8a..52cb1244 100644 --- a/Samples/2_Concepts_and_Techniques/sortingNetworks/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/CMakeLists.txt index ecfd0087..284d9390 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/CMakeLists.txt index 67d30909..514c44ae 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/streamOrderedAllocationIPC.cu b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/streamOrderedAllocationIPC.cu index 3bfa9fe5..b473e8da 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/streamOrderedAllocationIPC.cu +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/streamOrderedAllocationIPC.cu @@ -322,7 +322,9 @@ static void parentProcess(char *app) } // This sample requires two processes accessing each device, so we need // to ensure exclusive or prohibited mode is not set - if (prop.computeMode != cudaComputeModeDefault) { + int computeMode; + checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, i)); + if (computeMode != cudaComputeModeDefault) { printf("Device %d is in an unsupported compute mode for this sample\n", i); continue; } diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/CMakeLists.txt index 2c265c58..220533eb 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/threadFenceReduction/CMakeLists.txt index cee0fea2..a9a08ad9 100644 --- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt index d88458e7..d27a8ea6 100644 --- a/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration.cpp b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration.cpp index 96c280aa..f6f4cdfc 100644 --- a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration.cpp +++ b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration.cpp @@ -118,13 +118,14 @@ bool runTest(int argc, char **argv); // to be setup and the CUDA module (CUBIN) is built by NVCC static CUresult InitCUDAContext(CUDAContext *pContext, CUdevice hcuDevice, int deviceID, char **argv) { - CUcontext hcuContext = 0; - CUmodule hcuModule = 0; - CUfunction hcuFunction = 0; - CUdeviceptr dptr = 0; + CUcontext hcuContext = 0; + CUmodule hcuModule = 0; + CUfunction hcuFunction = 0; + CUdeviceptr dptr = 0; + CUctxCreateParams ctxCreateParams = {}; // cuCtxCreate: Function works on floating contexts and current context - CUresult status = cuCtxCreate(&hcuContext, 0, hcuDevice); + CUresult status = cuCtxCreate(&hcuContext, &ctxCreateParams, 0, hcuDevice); if (CUDA_SUCCESS != status) { fprintf(stderr, "cuCtxCreate for failed %d\n", deviceID, status); diff --git a/Samples/3_CUDA_Features/StreamPriorities/CMakeLists.txt b/Samples/3_CUDA_Features/StreamPriorities/CMakeLists.txt index 561a2f47..f0ddabd4 100644 --- a/Samples/3_CUDA_Features/StreamPriorities/CMakeLists.txt +++ b/Samples/3_CUDA_Features/StreamPriorities/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/CMakeLists.txt b/Samples/3_CUDA_Features/bf16TensorCoreGemm/CMakeLists.txt index 76245f2d..f3fd01d7 100644 --- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/CMakeLists.txt +++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) else() diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/CMakeLists.txt b/Samples/3_CUDA_Features/binaryPartitionCG/CMakeLists.txt index 61f471ca..0454283d 100644 --- a/Samples/3_CUDA_Features/binaryPartitionCG/CMakeLists.txt +++ b/Samples/3_CUDA_Features/binaryPartitionCG/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/3_CUDA_Features/bindlessTexture/CMakeLists.txt b/Samples/3_CUDA_Features/bindlessTexture/CMakeLists.txt index 90751dff..c4166520 100644 --- a/Samples/3_CUDA_Features/bindlessTexture/CMakeLists.txt +++ b/Samples/3_CUDA_Features/bindlessTexture/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt index 597ac752..298382f5 100644 --- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt +++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt @@ -11,12 +11,12 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") # Need to differentiate Tegra_aarch64 and other aarch64 systems(sbsa_aarch64) as they have different CUDA_ARCHITECTURES list if(${BUILD_TEGRA}) - set(CMAKE_CUDA_ARCHITECTURES 72 87 101) + set(CMAKE_CUDA_ARCHITECTURES 87 101) else() - set(CMAKE_CUDA_ARCHITECTURES 61 70 75 80 86 90) + set(CMAKE_CUDA_ARCHITECTURES 75 80 86 90) endif() else() - set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90 100 101 120) + set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90 100 101 120) endif() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/CMakeLists.txt b/Samples/3_CUDA_Features/cdpBezierTessellation/CMakeLists.txt index b2bcb8a7..43a6b015 100644 --- a/Samples/3_CUDA_Features/cdpBezierTessellation/CMakeLists.txt +++ b/Samples/3_CUDA_Features/cdpBezierTessellation/CMakeLists.txt @@ -11,12 +11,12 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") # Need to differentiate Tegra_aarch64 and other aarch64 systems(sbsa_aarch64) as they have different CUDA_ARCHITECTURES list if(${BUILD_TEGRA}) - set(CMAKE_CUDA_ARCHITECTURES 72 87 101) + set(CMAKE_CUDA_ARCHITECTURES 87 101) else() - set(CMAKE_CUDA_ARCHITECTURES 61 70 75 80 86 90) + set(CMAKE_CUDA_ARCHITECTURES 75 80 86 90) endif() else() - set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90 100 101 120) + set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90 100 101 120) endif() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") diff --git a/Samples/3_CUDA_Features/cdpQuadtree/CMakeLists.txt b/Samples/3_CUDA_Features/cdpQuadtree/CMakeLists.txt index f262d70c..1fd41d0a 100644 --- a/Samples/3_CUDA_Features/cdpQuadtree/CMakeLists.txt +++ b/Samples/3_CUDA_Features/cdpQuadtree/CMakeLists.txt @@ -11,12 +11,12 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") # Need to differentiate Tegra_aarch64 and other aarch64 systems(sbsa_aarch64) as they have different CUDA_ARCHITECTURES list if(${BUILD_TEGRA}) - set(CMAKE_CUDA_ARCHITECTURES 72 87 101) + set(CMAKE_CUDA_ARCHITECTURES 87 101) else() - set(CMAKE_CUDA_ARCHITECTURES 61 70 75 80 86 90) + set(CMAKE_CUDA_ARCHITECTURES 75 80 86 90) endif() else() - set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90 100 101 120) + set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90 100 101 120) endif() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/CMakeLists.txt b/Samples/3_CUDA_Features/cdpSimplePrint/CMakeLists.txt index 3732bf29..119e5ea5 100644 --- a/Samples/3_CUDA_Features/cdpSimplePrint/CMakeLists.txt +++ b/Samples/3_CUDA_Features/cdpSimplePrint/CMakeLists.txt @@ -11,12 +11,12 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") # Need to differentiate Tegra_aarch64 and other aarch64 systems(sbsa_aarch64) as they have different CUDA_ARCHITECTURES list if(${BUILD_TEGRA}) - set(CMAKE_CUDA_ARCHITECTURES 72 87 101) + set(CMAKE_CUDA_ARCHITECTURES 87 101) else() - set(CMAKE_CUDA_ARCHITECTURES 61 70 75 80 86 90) + set(CMAKE_CUDA_ARCHITECTURES 75 80 86 90) endif() else() - set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90 100 101 120) + set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90 100 101 120) endif() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/CMakeLists.txt b/Samples/3_CUDA_Features/cdpSimpleQuicksort/CMakeLists.txt index f616cd02..15ae4477 100644 --- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/CMakeLists.txt +++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/CMakeLists.txt @@ -11,12 +11,12 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") # Need to differentiate Tegra_aarch64 and other aarch64 systems(sbsa_aarch64) as they have different CUDA_ARCHITECTURES list if(${BUILD_TEGRA}) - set(CMAKE_CUDA_ARCHITECTURES 72 87 101) + set(CMAKE_CUDA_ARCHITECTURES 87 101) else() - set(CMAKE_CUDA_ARCHITECTURES 61 70 75 80 86 90) + set(CMAKE_CUDA_ARCHITECTURES 75 80 86 90) endif() else() - set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90 100 101 120) + set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90 100 101 120) endif() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/CMakeLists.txt b/Samples/3_CUDA_Features/cudaCompressibleMemory/CMakeLists.txt index 7313becf..c276f553 100644 --- a/Samples/3_CUDA_Features/cudaCompressibleMemory/CMakeLists.txt +++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/CMakeLists.txt b/Samples/3_CUDA_Features/cudaTensorCoreGemm/CMakeLists.txt index f6bf81e3..a66809dd 100644 --- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/CMakeLists.txt +++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/CMakeLists.txt b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/CMakeLists.txt index ba7d2364..0fc1fb65 100644 --- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/CMakeLists.txt +++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/CMakeLists.txt b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/CMakeLists.txt index 8f953d0d..47f20620 100644 --- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/CMakeLists.txt +++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/CMakeLists.txt b/Samples/3_CUDA_Features/graphConditionalNodes/CMakeLists.txt index 9b623c4e..952f07d0 100644 --- a/Samples/3_CUDA_Features/graphConditionalNodes/CMakeLists.txt +++ b/Samples/3_CUDA_Features/graphConditionalNodes/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes.cu b/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes.cu index 5fbdbfbe..22ade6a7 100644 --- a/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes.cu +++ b/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes.cu @@ -97,13 +97,13 @@ void simpleIfGraph(void) params.kernel.kernelParams = kernelArgs; kernelArgs[0] = &dPtr; kernelArgs[1] = &handle; - checkCudaErrors(cudaGraphAddNode(&kernelNode, graph, NULL, 0, ¶ms)); + checkCudaErrors(cudaGraphAddNode(&kernelNode, graph, NULL, NULL, 0, ¶ms)); cudaGraphNodeParams cParams = {cudaGraphNodeTypeConditional}; cParams.conditional.handle = handle; cParams.conditional.type = cudaGraphCondTypeIf; cParams.conditional.size = 1; - checkCudaErrors(cudaGraphAddNode(&conditionalNode, graph, &kernelNode, 1, &cParams)); + checkCudaErrors(cudaGraphAddNode(&conditionalNode, graph, &kernelNode, NULL, 0, &cParams)); cudaGraph_t bodyGraph = cParams.conditional.phGraph_out[0]; @@ -111,7 +111,7 @@ void simpleIfGraph(void) cudaGraphNode_t bodyNode; params.kernel.func = (void *)ifGraphKernelC; params.kernel.kernelParams = nullptr; - checkCudaErrors(cudaGraphAddNode(&bodyNode, bodyGraph, NULL, 0, ¶ms)); + checkCudaErrors(cudaGraphAddNode(&bodyNode, bodyGraph, NULL, NULL, 0, ¶ms)); checkCudaErrors(cudaGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); @@ -182,7 +182,7 @@ void simpleDoWhileGraph(void) cParams.conditional.handle = handle; cParams.conditional.type = cudaGraphCondTypeWhile; cParams.conditional.size = 1; - checkCudaErrors(cudaGraphAddNode(&conditionalNode, graph, NULL, 0, &cParams)); + checkCudaErrors(cudaGraphAddNode(&conditionalNode, graph, NULL, NULL, 0, &cParams)); cudaGraph_t bodyGraph = cParams.conditional.phGraph_out[0]; @@ -267,7 +267,8 @@ void capturedWhileGraph(void) checkCudaErrors(cudaStreamBeginCapture(captureStream, cudaStreamCaptureModeGlobal)); // Obtain the handle of the graph - checkCudaErrors(cudaStreamGetCaptureInfo(captureStream, &status, NULL, &graph, &dependencies, &numDependencies)); + checkCudaErrors( + cudaStreamGetCaptureInfo(captureStream, &status, NULL, &graph, &dependencies, NULL, &numDependencies)); // Create the conditional handle cudaGraphConditionalHandle handle; @@ -277,7 +278,8 @@ void capturedWhileGraph(void) capturedWhileKernel<<<1, 1, 0, captureStream>>>(dPtr, handle); // Obtain the handle for node A - checkCudaErrors(cudaStreamGetCaptureInfo(captureStream, &status, NULL, &graph, &dependencies, &numDependencies)); + checkCudaErrors( + cudaStreamGetCaptureInfo(captureStream, &status, NULL, &graph, &dependencies, NULL, &numDependencies)); // Insert conditional node B cudaGraphNode_t conditionalNode; @@ -285,13 +287,13 @@ void capturedWhileGraph(void) cParams.conditional.handle = handle; cParams.conditional.type = cudaGraphCondTypeWhile; cParams.conditional.size = 1; - checkCudaErrors(cudaGraphAddNode(&conditionalNode, graph, dependencies, numDependencies, &cParams)); + checkCudaErrors(cudaGraphAddNode(&conditionalNode, graph, dependencies, NULL, numDependencies, &cParams)); cudaGraph_t bodyGraph = cParams.conditional.phGraph_out[0]; // Update stream capture dependencies to account for the node we manually added - checkCudaErrors( - cudaStreamUpdateCaptureDependencies(captureStream, &conditionalNode, 1, cudaStreamSetCaptureDependencies)); + checkCudaErrors(cudaStreamUpdateCaptureDependencies( + captureStream, &conditionalNode, NULL, 1, cudaStreamSetCaptureDependencies)); // Insert kernel node D capturedWhileEmptyKernel<<<1, 1, 0, captureStream>>>(); @@ -380,13 +382,13 @@ void simpleIfElseGraph(void) params.kernel.kernelParams = kernelArgs; kernelArgs[0] = &dPtr; kernelArgs[1] = &handle; - checkCudaErrors(cudaGraphAddNode(&kernelNode, graph, NULL, 0, ¶ms)); + checkCudaErrors(cudaGraphAddNode(&kernelNode, graph, NULL, NULL, 0, ¶ms)); cudaGraphNodeParams cParams = {cudaGraphNodeTypeConditional}; cParams.conditional.handle = handle; cParams.conditional.type = cudaGraphCondTypeIf; cParams.conditional.size = 2; // Set size to 2 to indicate an ELSE graph will be used - checkCudaErrors(cudaGraphAddNode(&conditionalNode, graph, &kernelNode, 1, &cParams)); + checkCudaErrors(cudaGraphAddNode(&conditionalNode, graph, &kernelNode, NULL, 0, &cParams)); cudaGraph_t bodyGraph = cParams.conditional.phGraph_out[0]; @@ -394,7 +396,7 @@ void simpleIfElseGraph(void) cudaGraphNode_t trueBodyNode; params.kernel.func = (void *)ifGraphKernelC; params.kernel.kernelParams = nullptr; - checkCudaErrors(cudaGraphAddNode(&trueBodyNode, bodyGraph, NULL, 0, ¶ms)); + checkCudaErrors(cudaGraphAddNode(&trueBodyNode, bodyGraph, NULL, NULL, 0, ¶ms)); // Populate the body of the second graph in the conditional node, executed if the condition is false bodyGraph = cParams.conditional.phGraph_out[1]; @@ -402,7 +404,7 @@ void simpleIfElseGraph(void) cudaGraphNode_t falseBodyNode; params.kernel.func = (void *)ifGraphKernelD; params.kernel.kernelParams = nullptr; - checkCudaErrors(cudaGraphAddNode(&falseBodyNode, bodyGraph, NULL, 0, ¶ms)); + checkCudaErrors(cudaGraphAddNode(&falseBodyNode, bodyGraph, NULL, NULL, 0, ¶ms)); checkCudaErrors(cudaGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); @@ -484,25 +486,25 @@ void simpleSwitchGraph(void) params.kernel.kernelParams = kernelArgs; kernelArgs[0] = &dPtr; kernelArgs[1] = &handle; - checkCudaErrors(cudaGraphAddNode(&kernelNode, graph, NULL, 0, ¶ms)); + checkCudaErrors(cudaGraphAddNode(&kernelNode, graph, NULL, NULL, 0, ¶ms)); cudaGraphNodeParams cParams = {cudaGraphNodeTypeConditional}; cParams.conditional.handle = handle; cParams.conditional.type = cudaGraphCondTypeSwitch; cParams.conditional.size = 4; - checkCudaErrors(cudaGraphAddNode(&conditionalNode, graph, &kernelNode, 1, &cParams)); + checkCudaErrors(cudaGraphAddNode(&conditionalNode, graph, &kernelNode, NULL, 0, &cParams)); // Populate the four graph bodies within the SWITCH conditional graph cudaGraphNode_t bodyNode; params.kernel.kernelParams = nullptr; params.kernel.func = (void *)switchGraphKernelC; - checkCudaErrors(cudaGraphAddNode(&bodyNode, cParams.conditional.phGraph_out[0], NULL, 0, ¶ms)); + checkCudaErrors(cudaGraphAddNode(&bodyNode, cParams.conditional.phGraph_out[0], NULL, NULL, 0, ¶ms)); params.kernel.func = (void *)switchGraphKernelD; - checkCudaErrors(cudaGraphAddNode(&bodyNode, cParams.conditional.phGraph_out[1], NULL, 0, ¶ms)); + checkCudaErrors(cudaGraphAddNode(&bodyNode, cParams.conditional.phGraph_out[1], NULL, NULL, 0, ¶ms)); params.kernel.func = (void *)switchGraphKernelE; - checkCudaErrors(cudaGraphAddNode(&bodyNode, cParams.conditional.phGraph_out[2], NULL, 0, ¶ms)); + checkCudaErrors(cudaGraphAddNode(&bodyNode, cParams.conditional.phGraph_out[2], NULL, NULL, 0, ¶ms)); params.kernel.func = (void *)switchGraphKernelF; - checkCudaErrors(cudaGraphAddNode(&bodyNode, cParams.conditional.phGraph_out[3], NULL, 0, ¶ms)); + checkCudaErrors(cudaGraphAddNode(&bodyNode, cParams.conditional.phGraph_out[3], NULL, NULL, 0, ¶ms)); checkCudaErrors(cudaGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/CMakeLists.txt b/Samples/3_CUDA_Features/graphMemoryFootprint/CMakeLists.txt index 8873b594..96a0282d 100644 --- a/Samples/3_CUDA_Features/graphMemoryFootprint/CMakeLists.txt +++ b/Samples/3_CUDA_Features/graphMemoryFootprint/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint.cu b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint.cu index 84fa45f1..8ca79852 100644 --- a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint.cu +++ b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint.cu @@ -149,9 +149,9 @@ void createSimpleAllocFreeGraph(cudaGraphExec_t *graphExec, float **dPtr, size_t checkCudaErrors(cudaGraphAddMemAllocNode(&allocNodeA, graph, NULL, 0, &allocParams)); *dPtr = (float *)allocParams.dptr; - cudaDeviceProp deviceProp; - checkCudaErrors(cudaGetDeviceProperties(&deviceProp, device)); - clock_t time_clocks = (clock_t)((kernelTime / 1000.0) * deviceProp.clockRate); + int clockRate; + checkCudaErrors(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, device)); + clock_t time_clocks = (clock_t)((kernelTime / 1000.0) * clockRate); void *blockDeviceArgs[1] = {(void *)&time_clocks}; diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/CMakeLists.txt b/Samples/3_CUDA_Features/graphMemoryNodes/CMakeLists.txt index 30040614..48a0ecda 100644 --- a/Samples/3_CUDA_Features/graphMemoryNodes/CMakeLists.txt +++ b/Samples/3_CUDA_Features/graphMemoryNodes/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/CMakeLists.txt b/Samples/3_CUDA_Features/immaTensorCoreGemm/CMakeLists.txt index dce21aa8..cae635da 100644 --- a/Samples/3_CUDA_Features/immaTensorCoreGemm/CMakeLists.txt +++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/CMakeLists.txt b/Samples/3_CUDA_Features/jacobiCudaGraphs/CMakeLists.txt index 0ed1f98b..b05a3179 100644 --- a/Samples/3_CUDA_Features/jacobiCudaGraphs/CMakeLists.txt +++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt b/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt index c3a04081..cedb6527 100644 --- a/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt +++ b/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp index 0fe208d2..09937acc 100644 --- a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp +++ b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp @@ -340,13 +340,14 @@ static void childProcess(int devId, int id, char **argv) std::vector shHandle(procCount); checkIpcErrors(ipcRecvShareableHandles(ipcChildHandle, shHandle)); - CUcontext ctx; - CUdevice device; - CUstream stream; - int multiProcessorCount; + CUcontext ctx; + CUdevice device; + CUstream stream; + int multiProcessorCount; + CUctxCreateParams ctx_params = {}; checkCudaErrors(cuDeviceGet(&device, devId)); - checkCudaErrors(cuCtxCreate(&ctx, 0, device)); + checkCudaErrors(cuCtxCreate(&ctx, &ctx_params, 0, device)); checkCudaErrors(cuStreamCreate(&stream, CU_STREAM_NON_BLOCKING)); // Obtain kernel function for the sample @@ -518,8 +519,9 @@ static void parentProcess(char *app) } } if (allPeers) { - CUcontext ctx; - checkCudaErrors(cuCtxCreate(&ctx, 0, devices[i])); + CUcontext ctx; + CUctxCreateParams ctx_params = {}; + checkCudaErrors(cuCtxCreate(&ctx, &ctx_params, 0, devices[i])); ctxs.push_back(ctx); // Enable peers here. This isn't necessary for IPC, but it will diff --git a/Samples/3_CUDA_Features/newdelete/CMakeLists.txt b/Samples/3_CUDA_Features/newdelete/CMakeLists.txt index b293fe4a..6abf984b 100644 --- a/Samples/3_CUDA_Features/newdelete/CMakeLists.txt +++ b/Samples/3_CUDA_Features/newdelete/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt b/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt index d1c27fb9..df9159fa 100644 --- a/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt +++ b/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/CMakeLists.txt b/Samples/3_CUDA_Features/simpleCudaGraphs/CMakeLists.txt index 13635969..6c9d6652 100644 --- a/Samples/3_CUDA_Features/simpleCudaGraphs/CMakeLists.txt +++ b/Samples/3_CUDA_Features/simpleCudaGraphs/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/CMakeLists.txt b/Samples/3_CUDA_Features/tf32TensorCoreGemm/CMakeLists.txt index 3b793770..0029f143 100644 --- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/CMakeLists.txt +++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/CMakeLists.txt b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/CMakeLists.txt index 1cfb090e..b9941d23 100644 --- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/CMakeLists.txt +++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/CMakeLists.txt b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/CMakeLists.txt index ba7bd2f3..8c1cb412 100644 --- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/CMakeLists.txt b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/CMakeLists.txt index f0fd8045..710cd808 100644 --- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/CMakeLists.txt b/Samples/4_CUDA_Libraries/batchCUBLAS/CMakeLists.txt index bc4e9c46..339d9f84 100644 --- a/Samples/4_CUDA_Libraries/batchCUBLAS/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/batchCUBLAS/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/CMakeLists.txt b/Samples/4_CUDA_Libraries/boxFilterNPP/CMakeLists.txt index 5fd6ef98..c8074aab 100644 --- a/Samples/4_CUDA_Libraries/boxFilterNPP/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/boxFilterNPP/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/CMakeLists.txt b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/CMakeLists.txt index eb309ad4..a99f85d9 100644 --- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/CMakeLists.txt b/Samples/4_CUDA_Libraries/conjugateGradient/CMakeLists.txt index 65b0ff35..403d0dd3 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradient/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/conjugateGradient/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/CMakeLists.txt b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/CMakeLists.txt index 20c1783a..99e9f156 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt index 6bbdd09a..05f6b164 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/CMakeLists.txt b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/CMakeLists.txt index 775ee452..38541fde 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG.cu b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG.cu index 3b20c063..6344d5e7 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG.cu +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG.cu @@ -585,9 +585,12 @@ int main(int argc, char **argv) genTridiag(I, J, val_cpu, N, nz); memcpy(val, val_cpu, sizeof(float) * nz); - checkCudaErrors(cudaMemAdvise(I, sizeof(int) * (N + 1), cudaMemAdviseSetReadMostly, 0)); - checkCudaErrors(cudaMemAdvise(J, sizeof(int) * nz, cudaMemAdviseSetReadMostly, 0)); - checkCudaErrors(cudaMemAdvise(val, sizeof(float) * nz, cudaMemAdviseSetReadMostly, 0)); + cudaMemLocation deviceLoc; + deviceLoc.type = cudaMemLocationTypeDevice; + deviceLoc.id = 0; // Device location with initial device 0 + checkCudaErrors(cudaMemAdvise(I, sizeof(int) * (N + 1), cudaMemAdviseSetReadMostly, deviceLoc)); + checkCudaErrors(cudaMemAdvise(J, sizeof(int) * nz, cudaMemAdviseSetReadMostly, deviceLoc)); + checkCudaErrors(cudaMemAdvise(val, sizeof(float) * nz, cudaMemAdviseSetReadMostly, deviceLoc)); checkCudaErrors(cudaMallocManaged((void **)&x, sizeof(float) * N)); @@ -648,26 +651,30 @@ int main(int argc, char **argv) int offset_p = device_count * totalThreadsPerGPU; int offset_x = device_count * totalThreadsPerGPU; - checkCudaErrors(cudaMemPrefetchAsync(I, sizeof(int) * N, *deviceId, nStreams[device_count])); - checkCudaErrors(cudaMemPrefetchAsync(val, sizeof(float) * nz, *deviceId, nStreams[device_count])); - checkCudaErrors(cudaMemPrefetchAsync(J, sizeof(float) * nz, *deviceId, nStreams[device_count])); + // Create device location with specific device ID + cudaMemLocation deviceLoc; + deviceLoc.type = cudaMemLocationTypeDevice; + deviceLoc.id = *deviceId; + checkCudaErrors(cudaMemPrefetchAsync(I, sizeof(int) * N, deviceLoc, 0, nStreams[device_count])); + checkCudaErrors(cudaMemPrefetchAsync(val, sizeof(float) * nz, deviceLoc, 0, nStreams[device_count])); + checkCudaErrors(cudaMemPrefetchAsync(J, sizeof(float) * nz, deviceLoc, 0, nStreams[device_count])); if (offset_Ax <= N) { for (int i = 0; i < perGPUIter; i++) { cudaMemAdvise( - Ax + offset_Ax, sizeof(float) * totalThreadsPerGPU, cudaMemAdviseSetPreferredLocation, *deviceId); + Ax + offset_Ax, sizeof(float) * totalThreadsPerGPU, cudaMemAdviseSetPreferredLocation, deviceLoc); cudaMemAdvise( - r + offset_r, sizeof(float) * totalThreadsPerGPU, cudaMemAdviseSetPreferredLocation, *deviceId); + r + offset_r, sizeof(float) * totalThreadsPerGPU, cudaMemAdviseSetPreferredLocation, deviceLoc); cudaMemAdvise( - x + offset_x, sizeof(float) * totalThreadsPerGPU, cudaMemAdviseSetPreferredLocation, *deviceId); + x + offset_x, sizeof(float) * totalThreadsPerGPU, cudaMemAdviseSetPreferredLocation, deviceLoc); cudaMemAdvise( - p + offset_p, sizeof(float) * totalThreadsPerGPU, cudaMemAdviseSetPreferredLocation, *deviceId); + p + offset_p, sizeof(float) * totalThreadsPerGPU, cudaMemAdviseSetPreferredLocation, deviceLoc); cudaMemAdvise( - Ax + offset_Ax, sizeof(float) * totalThreadsPerGPU, cudaMemAdviseSetAccessedBy, *deviceId); - cudaMemAdvise(r + offset_r, sizeof(float) * totalThreadsPerGPU, cudaMemAdviseSetAccessedBy, *deviceId); - cudaMemAdvise(p + offset_p, sizeof(float) * totalThreadsPerGPU, cudaMemAdviseSetAccessedBy, *deviceId); - cudaMemAdvise(x + offset_x, sizeof(float) * totalThreadsPerGPU, cudaMemAdviseSetAccessedBy, *deviceId); + Ax + offset_Ax, sizeof(float) * totalThreadsPerGPU, cudaMemAdviseSetAccessedBy, deviceLoc); + cudaMemAdvise(r + offset_r, sizeof(float) * totalThreadsPerGPU, cudaMemAdviseSetAccessedBy, deviceLoc); + cudaMemAdvise(p + offset_p, sizeof(float) * totalThreadsPerGPU, cudaMemAdviseSetAccessedBy, deviceLoc); + cudaMemAdvise(x + offset_x, sizeof(float) * totalThreadsPerGPU, cudaMemAdviseSetAccessedBy, deviceLoc); offset_Ax += totalThreadsPerGPU * kNumGpusRequired; offset_r += totalThreadsPerGPU * kNumGpusRequired; @@ -739,8 +746,11 @@ int main(int argc, char **argv) deviceId++; } - checkCudaErrors(cudaMemPrefetchAsync(x, sizeof(float) * N, cudaCpuDeviceId)); - checkCudaErrors(cudaMemPrefetchAsync(dot_result, sizeof(double), cudaCpuDeviceId)); + // Use cudaMemLocationTypeHost for optimal host memory location + cudaMemLocation hostLoc; + hostLoc.type = cudaMemLocationTypeHost; + checkCudaErrors(cudaMemPrefetchAsync(x, sizeof(float) * N, hostLoc, 0)); + checkCudaErrors(cudaMemPrefetchAsync(dot_result, sizeof(double), hostLoc, 0)); deviceId = bestFitDeviceIds.begin(); device_count = 0; diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/CMakeLists.txt b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/CMakeLists.txt index 455dd818..29dd2709 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/CMakeLists.txt b/Samples/4_CUDA_Libraries/conjugateGradientUM/CMakeLists.txt index 492b4f24..d3ead386 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientUM/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/CMakeLists.txt b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/CMakeLists.txt index 8865f7ef..8fae158b 100644 --- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/CMakeLists.txt b/Samples/4_CUDA_Libraries/cuSolverRf/CMakeLists.txt index 20f57fd6..3bb519ae 100644 --- a/Samples/4_CUDA_Libraries/cuSolverRf/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/cuSolverRf/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/CMakeLists.txt b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/CMakeLists.txt index 92d273ce..210c71cc 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/CMakeLists.txt b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/CMakeLists.txt index 351a5175..9dd69f65 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/CMakeLists.txt b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/CMakeLists.txt index fe36e358..f813b74a 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/CMakeLists.txt b/Samples/4_CUDA_Libraries/cudaNvSci/CMakeLists.txt index 0e25a301..91b19005 100644 --- a/Samples/4_CUDA_Libraries/cudaNvSci/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/cudaNvSci/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 53 61 70 72 75 80 86 87 90) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 90) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/CMakeLists.txt b/Samples/4_CUDA_Libraries/freeImageInteropNPP/CMakeLists.txt index 4f04ace7..4b4cfd4c 100644 --- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/CMakeLists.txt b/Samples/4_CUDA_Libraries/histEqualizationNPP/CMakeLists.txt index bd2b9b5d..d6736fd4 100644 --- a/Samples/4_CUDA_Libraries/histEqualizationNPP/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/jitLto/CMakeLists.txt b/Samples/4_CUDA_Libraries/jitLto/CMakeLists.txt index 428edd44..2f21aaaa 100644 --- a/Samples/4_CUDA_Libraries/jitLto/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/jitLto/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/jitLto/jitLto.cpp b/Samples/4_CUDA_Libraries/jitLto/jitLto.cpp index 0bf1d732..0a57dfee 100644 --- a/Samples/4_CUDA_Libraries/jitLto/jitLto.cpp +++ b/Samples/4_CUDA_Libraries/jitLto/jitLto.cpp @@ -146,13 +146,14 @@ int main(int argc, char *argv[]) getLTOIR(lto_saxpy, "lto_saxpy.cu", <oIR1, <oIR1Size); getLTOIR(lto_compute, "lto_compute.cu", <oIR2, <oIR2Size); - CUdevice cuDevice; - CUcontext context; - CUmodule module; - CUfunction kernel; + CUdevice cuDevice; + CUcontext context; + CUmodule module; + CUfunction kernel; + CUctxCreateParams ctxCreateParams = {}; CUDA_SAFE_CALL(cuInit(0)); CUDA_SAFE_CALL(cuDeviceGet(&cuDevice, 0)); - CUDA_SAFE_CALL(cuCtxCreate(&context, 0, cuDevice)); + CUDA_SAFE_CALL(cuCtxCreate(&context, &ctxCreateParams, 0, cuDevice)); // Dynamically determine the arch to link for int major = 0; diff --git a/Samples/4_CUDA_Libraries/lineOfSight/CMakeLists.txt b/Samples/4_CUDA_Libraries/lineOfSight/CMakeLists.txt index e3fab2d0..0f763510 100644 --- a/Samples/4_CUDA_Libraries/lineOfSight/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/lineOfSight/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/CMakeLists.txt b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/CMakeLists.txt index a04c2726..bd970243 100644 --- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/nvJPEG/CMakeLists.txt b/Samples/4_CUDA_Libraries/nvJPEG/CMakeLists.txt index 244c8881..7bd35d14 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/nvJPEG/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/CMakeLists.txt b/Samples/4_CUDA_Libraries/nvJPEG_encoder/CMakeLists.txt index 417b5bfc..d372b30a 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/oceanFFT/CMakeLists.txt b/Samples/4_CUDA_Libraries/oceanFFT/CMakeLists.txt index f79f5741..e2edc4c9 100644 --- a/Samples/4_CUDA_Libraries/oceanFFT/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/oceanFFT/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/randomFog/CMakeLists.txt b/Samples/4_CUDA_Libraries/randomFog/CMakeLists.txt index cf6e76da..fe004531 100644 --- a/Samples/4_CUDA_Libraries/randomFog/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/randomFog/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/CMakeLists.txt b/Samples/4_CUDA_Libraries/simpleCUBLAS/CMakeLists.txt index 57e7cc17..0d21e462 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/CMakeLists.txt b/Samples/4_CUDA_Libraries/simpleCUBLASXT/CMakeLists.txt index bdbc352a..0650910b 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT.cpp b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT.cpp index 44f85de9..b100f1ae 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT.cpp +++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT.cpp @@ -84,13 +84,17 @@ void findMultipleBestGPUs(int &num_of_devices, int *device_ids) cudaDeviceProp deviceProp; int devices_prohibited = 0; + int computeMode; + int clockRate; + while (current_device < device_count) { cudaGetDeviceProperties(&deviceProp, current_device); - + checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device)); + checkCudaErrors(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, current_device)); // If this GPU is not running on Compute Mode prohibited, // then we can add it to the list int sm_per_multiproc; - if (deviceProp.computeMode != cudaComputeModeProhibited) { + if (computeMode != cudaComputeModeProhibited) { if (deviceProp.major == 9999 && deviceProp.minor == 9999) { sm_per_multiproc = 1; } @@ -99,7 +103,7 @@ void findMultipleBestGPUs(int &num_of_devices, int *device_ids) } gpu_stats[current_device].compute_perf = - (uint64_t)deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate; + (uint64_t)deviceProp.multiProcessorCount * sm_per_multiproc * clockRate; gpu_stats[current_device].device_id = current_device; } else { diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/CMakeLists.txt b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/CMakeLists.txt index 64d76ff5..f13b9c8f 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/CMakeLists.txt b/Samples/4_CUDA_Libraries/simpleCUFFT/CMakeLists.txt index 8f70e5d2..6e0f0499 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/simpleCUFFT/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/CMakeLists.txt b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/CMakeLists.txt index 3784fcb7..464740fd 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/CMakeLists.txt b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/CMakeLists.txt index 5da52a4d..3fbad6e0 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/CMakeLists.txt b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/CMakeLists.txt index 521f252f..90c88cd5 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/CMakeLists.txt b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/CMakeLists.txt index 74d99dbf..1d84ff72 100644 --- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/BlackScholes/CMakeLists.txt b/Samples/5_Domain_Specific/BlackScholes/CMakeLists.txt index 88b7b3c2..cd130023 100644 --- a/Samples/5_Domain_Specific/BlackScholes/CMakeLists.txt +++ b/Samples/5_Domain_Specific/BlackScholes/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/CMakeLists.txt b/Samples/5_Domain_Specific/BlackScholes_nvrtc/CMakeLists.txt index 8684a4e9..623478fb 100644 --- a/Samples/5_Domain_Specific/BlackScholes_nvrtc/CMakeLists.txt +++ b/Samples/5_Domain_Specific/BlackScholes_nvrtc/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/FDTD3d/CMakeLists.txt b/Samples/5_Domain_Specific/FDTD3d/CMakeLists.txt index 8defd030..315319f2 100644 --- a/Samples/5_Domain_Specific/FDTD3d/CMakeLists.txt +++ b/Samples/5_Domain_Specific/FDTD3d/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/CMakeLists.txt b/Samples/5_Domain_Specific/HSOpticalFlow/CMakeLists.txt index c8198113..599280c8 100644 --- a/Samples/5_Domain_Specific/HSOpticalFlow/CMakeLists.txt +++ b/Samples/5_Domain_Specific/HSOpticalFlow/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/Mandelbrot/CMakeLists.txt b/Samples/5_Domain_Specific/Mandelbrot/CMakeLists.txt index af4c103f..1b35ed33 100644 --- a/Samples/5_Domain_Specific/Mandelbrot/CMakeLists.txt +++ b/Samples/5_Domain_Specific/Mandelbrot/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/CMakeLists.txt b/Samples/5_Domain_Specific/MonteCarloMultiGPU/CMakeLists.txt index d6433daf..d37dae45 100644 --- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/CMakeLists.txt +++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/CMakeLists.txt b/Samples/5_Domain_Specific/NV12toBGRandResize/CMakeLists.txt index e607a7ab..7b03188f 100644 --- a/Samples/5_Domain_Specific/NV12toBGRandResize/CMakeLists.txt +++ b/Samples/5_Domain_Specific/NV12toBGRandResize/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/SobelFilter/CMakeLists.txt b/Samples/5_Domain_Specific/SobelFilter/CMakeLists.txt index 3bd61f31..7f517960 100644 --- a/Samples/5_Domain_Specific/SobelFilter/CMakeLists.txt +++ b/Samples/5_Domain_Specific/SobelFilter/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/SobolQRNG/CMakeLists.txt b/Samples/5_Domain_Specific/SobolQRNG/CMakeLists.txt index 6202987d..7e6dfb0f 100644 --- a/Samples/5_Domain_Specific/SobolQRNG/CMakeLists.txt +++ b/Samples/5_Domain_Specific/SobolQRNG/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/bicubicTexture/CMakeLists.txt b/Samples/5_Domain_Specific/bicubicTexture/CMakeLists.txt index c63b20cb..d812d607 100644 --- a/Samples/5_Domain_Specific/bicubicTexture/CMakeLists.txt +++ b/Samples/5_Domain_Specific/bicubicTexture/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/bilateralFilter/CMakeLists.txt b/Samples/5_Domain_Specific/bilateralFilter/CMakeLists.txt index ac68dd45..23263e0b 100644 --- a/Samples/5_Domain_Specific/bilateralFilter/CMakeLists.txt +++ b/Samples/5_Domain_Specific/bilateralFilter/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/binomialOptions/CMakeLists.txt b/Samples/5_Domain_Specific/binomialOptions/CMakeLists.txt index 2e12e1bc..f867034d 100644 --- a/Samples/5_Domain_Specific/binomialOptions/CMakeLists.txt +++ b/Samples/5_Domain_Specific/binomialOptions/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/CMakeLists.txt b/Samples/5_Domain_Specific/binomialOptions_nvrtc/CMakeLists.txt index 19914ad2..3e274d2a 100644 --- a/Samples/5_Domain_Specific/binomialOptions_nvrtc/CMakeLists.txt +++ b/Samples/5_Domain_Specific/binomialOptions_nvrtc/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/CMakeLists.txt b/Samples/5_Domain_Specific/convolutionFFT2D/CMakeLists.txt index 74bebaf6..8a2b9d91 100644 --- a/Samples/5_Domain_Specific/convolutionFFT2D/CMakeLists.txt +++ b/Samples/5_Domain_Specific/convolutionFFT2D/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/dwtHaar1D/CMakeLists.txt b/Samples/5_Domain_Specific/dwtHaar1D/CMakeLists.txt index 2a951f23..30715419 100644 --- a/Samples/5_Domain_Specific/dwtHaar1D/CMakeLists.txt +++ b/Samples/5_Domain_Specific/dwtHaar1D/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/dxtc/CMakeLists.txt b/Samples/5_Domain_Specific/dxtc/CMakeLists.txt index 471a75cf..48c1cdb2 100644 --- a/Samples/5_Domain_Specific/dxtc/CMakeLists.txt +++ b/Samples/5_Domain_Specific/dxtc/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/fastWalshTransform/CMakeLists.txt b/Samples/5_Domain_Specific/fastWalshTransform/CMakeLists.txt index dd5d39f8..eed64581 100644 --- a/Samples/5_Domain_Specific/fastWalshTransform/CMakeLists.txt +++ b/Samples/5_Domain_Specific/fastWalshTransform/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/fluidsGL/CMakeLists.txt b/Samples/5_Domain_Specific/fluidsGL/CMakeLists.txt index 8ba01f3f..50188db1 100644 --- a/Samples/5_Domain_Specific/fluidsGL/CMakeLists.txt +++ b/Samples/5_Domain_Specific/fluidsGL/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/marchingCubes/CMakeLists.txt b/Samples/5_Domain_Specific/marchingCubes/CMakeLists.txt index a6fe9e94..1e62b5a0 100644 --- a/Samples/5_Domain_Specific/marchingCubes/CMakeLists.txt +++ b/Samples/5_Domain_Specific/marchingCubes/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/nbody/CMakeLists.txt b/Samples/5_Domain_Specific/nbody/CMakeLists.txt index be99daf3..9c139026 100644 --- a/Samples/5_Domain_Specific/nbody/CMakeLists.txt +++ b/Samples/5_Domain_Specific/nbody/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/CMakeLists.txt b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/CMakeLists.txt index cd485714..06c2b29f 100644 --- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/CMakeLists.txt +++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/postProcessGL/CMakeLists.txt b/Samples/5_Domain_Specific/postProcessGL/CMakeLists.txt index dd272bbc..d5584473 100644 --- a/Samples/5_Domain_Specific/postProcessGL/CMakeLists.txt +++ b/Samples/5_Domain_Specific/postProcessGL/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/CMakeLists.txt b/Samples/5_Domain_Specific/quasirandomGenerator/CMakeLists.txt index 40ee2936..4bda1257 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator/CMakeLists.txt +++ b/Samples/5_Domain_Specific/quasirandomGenerator/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/CMakeLists.txt b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/CMakeLists.txt index 33539c81..0b58e97d 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/CMakeLists.txt +++ b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/recursiveGaussian/CMakeLists.txt b/Samples/5_Domain_Specific/recursiveGaussian/CMakeLists.txt index 1ee9675d..c8d8a9c9 100644 --- a/Samples/5_Domain_Specific/recursiveGaussian/CMakeLists.txt +++ b/Samples/5_Domain_Specific/recursiveGaussian/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/simpleD3D11/CMakeLists.txt b/Samples/5_Domain_Specific/simpleD3D11/CMakeLists.txt index 7222dead..18092d84 100644 --- a/Samples/5_Domain_Specific/simpleD3D11/CMakeLists.txt +++ b/Samples/5_Domain_Specific/simpleD3D11/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/CMakeLists.txt b/Samples/5_Domain_Specific/simpleD3D11Texture/CMakeLists.txt index 1f6f5cc4..25638bf1 100644 --- a/Samples/5_Domain_Specific/simpleD3D11Texture/CMakeLists.txt +++ b/Samples/5_Domain_Specific/simpleD3D11Texture/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/simpleD3D12/CMakeLists.txt b/Samples/5_Domain_Specific/simpleD3D12/CMakeLists.txt index 82b70fae..401a243e 100644 --- a/Samples/5_Domain_Specific/simpleD3D12/CMakeLists.txt +++ b/Samples/5_Domain_Specific/simpleD3D12/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/simpleGL/CMakeLists.txt b/Samples/5_Domain_Specific/simpleGL/CMakeLists.txt index ace2cce3..fd190588 100644 --- a/Samples/5_Domain_Specific/simpleGL/CMakeLists.txt +++ b/Samples/5_Domain_Specific/simpleGL/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/simpleVulkan/CMakeLists.txt b/Samples/5_Domain_Specific/simpleVulkan/CMakeLists.txt index e6bd2f30..c368d578 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/CMakeLists.txt +++ b/Samples/5_Domain_Specific/simpleVulkan/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/simpleVulkan/SineWaveSimulation.cu b/Samples/5_Domain_Specific/simpleVulkan/SineWaveSimulation.cu index 7c6a7737..032bf2f5 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/SineWaveSimulation.cu +++ b/Samples/5_Domain_Specific/simpleVulkan/SineWaveSimulation.cu @@ -94,8 +94,10 @@ int SineWaveSimulation::initCuda(uint8_t *vkDeviceUUID, size_t UUID_SIZE) // Find the GPU which is selected by Vulkan while (current_device < device_count) { cudaGetDeviceProperties(&deviceProp, current_device); + int computeMode; + checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device)); - if ((deviceProp.computeMode != cudaComputeModeProhibited)) { + if ((computeMode != cudaComputeModeProhibited)) { // Compare the cuda device UUID with vulkan UUID int ret = memcmp((void *)&deviceProp.uuid, vkDeviceUUID, UUID_SIZE); if (ret == 0) { diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/CMakeLists.txt b/Samples/5_Domain_Specific/simpleVulkanMMAP/CMakeLists.txt index 48efbd27..e3902483 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/CMakeLists.txt +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/smokeParticles/CMakeLists.txt b/Samples/5_Domain_Specific/smokeParticles/CMakeLists.txt index 7f31868d..c6c8f63f 100644 --- a/Samples/5_Domain_Specific/smokeParticles/CMakeLists.txt +++ b/Samples/5_Domain_Specific/smokeParticles/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/stereoDisparity/CMakeLists.txt b/Samples/5_Domain_Specific/stereoDisparity/CMakeLists.txt index dbd41779..4f7e79b3 100644 --- a/Samples/5_Domain_Specific/stereoDisparity/CMakeLists.txt +++ b/Samples/5_Domain_Specific/stereoDisparity/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/volumeFiltering/CMakeLists.txt b/Samples/5_Domain_Specific/volumeFiltering/CMakeLists.txt index 6b225790..d91af981 100644 --- a/Samples/5_Domain_Specific/volumeFiltering/CMakeLists.txt +++ b/Samples/5_Domain_Specific/volumeFiltering/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/volumeRender/CMakeLists.txt b/Samples/5_Domain_Specific/volumeRender/CMakeLists.txt index 342584cb..1706b3eb 100644 --- a/Samples/5_Domain_Specific/volumeRender/CMakeLists.txt +++ b/Samples/5_Domain_Specific/volumeRender/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/CMakeLists.txt b/Samples/5_Domain_Specific/vulkanImageCUDA/CMakeLists.txt index 93c67767..605657ce 100644 --- a/Samples/5_Domain_Specific/vulkanImageCUDA/CMakeLists.txt +++ b/Samples/5_Domain_Specific/vulkanImageCUDA/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA.cu b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA.cu index f782bbb9..702d6ee3 100644 --- a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA.cu +++ b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA.cu @@ -830,6 +830,7 @@ private: int devices_prohibited = 0; cudaDeviceProp deviceProp; + int computeMode; checkCudaErrors(cudaGetDeviceCount(&device_count)); if (device_count == 0) { @@ -840,8 +841,8 @@ private: // Find the GPU which is selected by Vulkan while (current_device < device_count) { cudaGetDeviceProperties(&deviceProp, current_device); - - if ((deviceProp.computeMode != cudaComputeModeProhibited)) { + checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device)); + if ((computeMode != cudaComputeModeProhibited)) { // Compare the cuda device UUID with vulkan UUID int ret = memcmp(&deviceProp.uuid, &vkDeviceUUID, VK_UUID_SIZE); if (ret == 0) { diff --git a/Samples/6_Performance/LargeKernelParameter/CMakeLists.txt b/Samples/6_Performance/LargeKernelParameter/CMakeLists.txt index 08c5dd61..f28d3994 100644 --- a/Samples/6_Performance/LargeKernelParameter/CMakeLists.txt +++ b/Samples/6_Performance/LargeKernelParameter/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/6_Performance/UnifiedMemoryPerf/CMakeLists.txt b/Samples/6_Performance/UnifiedMemoryPerf/CMakeLists.txt index 1cea77f3..c62b33f9 100644 --- a/Samples/6_Performance/UnifiedMemoryPerf/CMakeLists.txt +++ b/Samples/6_Performance/UnifiedMemoryPerf/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/6_Performance/UnifiedMemoryPerf/matrixMultiplyPerf.cu b/Samples/6_Performance/UnifiedMemoryPerf/matrixMultiplyPerf.cu index b260fbcc..59533cfd 100644 --- a/Samples/6_Performance/UnifiedMemoryPerf/matrixMultiplyPerf.cu +++ b/Samples/6_Performance/UnifiedMemoryPerf/matrixMultiplyPerf.cu @@ -335,9 +335,11 @@ void runMatrixMultiplyKernel(unsigned int matrixDim, checkCudaErrors(cudaMallocManaged(&dptrA, size)); checkCudaErrors(cudaMallocManaged(&dptrB, size)); checkCudaErrors(cudaMallocManaged(&dptrC, size)); - checkCudaErrors(cudaMemPrefetchAsync(dptrA, size, cudaCpuDeviceId)); - checkCudaErrors(cudaMemPrefetchAsync(dptrB, size, cudaCpuDeviceId)); - checkCudaErrors(cudaMemPrefetchAsync(dptrC, size, cudaCpuDeviceId)); + cudaMemLocation hostLoc; + hostLoc.type = cudaMemLocationTypeHost; + checkCudaErrors(cudaMemPrefetchAsync(dptrA, size, hostLoc, 0)); + checkCudaErrors(cudaMemPrefetchAsync(dptrB, size, hostLoc, 0)); + checkCudaErrors(cudaMemPrefetchAsync(dptrC, size, hostLoc, 0)); } else { checkCudaErrors(cudaMallocManaged(&dptrA, size, cudaMemAttachHost)); @@ -402,9 +404,12 @@ void runMatrixMultiplyKernel(unsigned int matrixDim, } if (hintsRequired) { if (deviceProp.concurrentManagedAccess) { - checkCudaErrors(cudaMemPrefetchAsync(dptrA, size, device_id, streamToRunOn)); - checkCudaErrors(cudaMemPrefetchAsync(dptrB, size, device_id, streamToRunOn)); - checkCudaErrors(cudaMemPrefetchAsync(dptrC, size, device_id, streamToRunOn)); + cudaMemLocation deviceLoc; + deviceLoc.type = cudaMemLocationTypeDevice; + deviceLoc.id = device_id; + checkCudaErrors(cudaMemPrefetchAsync(dptrA, size, deviceLoc, 0, streamToRunOn)); + checkCudaErrors(cudaMemPrefetchAsync(dptrB, size, deviceLoc, 0, streamToRunOn)); + checkCudaErrors(cudaMemPrefetchAsync(dptrC, size, deviceLoc, 0, streamToRunOn)); } else { checkCudaErrors(cudaStreamAttachMemAsync(streamToRunOn, dptrA, 0, cudaMemAttachGlobal)); @@ -437,9 +442,11 @@ void runMatrixMultiplyKernel(unsigned int matrixDim, sdkStartTimer(&gpuTransferCallsTimer); if (hintsRequired) { if (deviceProp.concurrentManagedAccess) { - checkCudaErrors(cudaMemPrefetchAsync(dptrA, size, cudaCpuDeviceId)); - checkCudaErrors(cudaMemPrefetchAsync(dptrB, size, cudaCpuDeviceId)); - checkCudaErrors(cudaMemPrefetchAsync(dptrC, size, cudaCpuDeviceId)); + cudaMemLocation hostLoc; + hostLoc.type = cudaMemLocationTypeHost; + checkCudaErrors(cudaMemPrefetchAsync(dptrA, size, hostLoc, 0)); + checkCudaErrors(cudaMemPrefetchAsync(dptrB, size, hostLoc, 0)); + checkCudaErrors(cudaMemPrefetchAsync(dptrC, size, hostLoc, 0)); } else { checkCudaErrors(cudaStreamAttachMemAsync(streamToRunOn, dptrA, 0, cudaMemAttachHost)); diff --git a/Samples/6_Performance/alignedTypes/CMakeLists.txt b/Samples/6_Performance/alignedTypes/CMakeLists.txt index ea89b73c..9dbf7ced 100644 --- a/Samples/6_Performance/alignedTypes/CMakeLists.txt +++ b/Samples/6_Performance/alignedTypes/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/6_Performance/cudaGraphsPerfScaling/CMakeLists.txt b/Samples/6_Performance/cudaGraphsPerfScaling/CMakeLists.txt index f56e0401..3c088a22 100644 --- a/Samples/6_Performance/cudaGraphsPerfScaling/CMakeLists.txt +++ b/Samples/6_Performance/cudaGraphsPerfScaling/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/6_Performance/transpose/CMakeLists.txt b/Samples/6_Performance/transpose/CMakeLists.txt index 0271319d..f066e8ba 100644 --- a/Samples/6_Performance/transpose/CMakeLists.txt +++ b/Samples/6_Performance/transpose/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120) +set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 101 120) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/7_libNVVM/CMakeLists.txt b/Samples/7_libNVVM/CMakeLists.txt index e29f349b..08310559 100644 --- a/Samples/7_libNVVM/CMakeLists.txt +++ b/Samples/7_libNVVM/CMakeLists.txt @@ -93,7 +93,7 @@ message(STATUS "Using rpath: ${CMAKE_INSTALL_RPATH}") # On Windows, locate the nvvm.dll so we can install it. if (WIN32) - find_file(NVVM_DLL nvvm64_40_0.dll PATHS "${LIBNVVM_HOME}/bin") + find_file(NVVM_DLL nvvm64_40_0.dll PATHS "${LIBNVVM_HOME}/bin" "${LIBNVVM_HOME}/bin/x64/") if (NOT NVVM_DLL) message(FATAL_ERROR "Found nvvm .h/.lib, but not .dll") endif() diff --git a/Samples/7_libNVVM/cuda-c-linking/CMakeLists.txt b/Samples/7_libNVVM/cuda-c-linking/CMakeLists.txt index ec70a1d4..a113a5d9 100644 --- a/Samples/7_libNVVM/cuda-c-linking/CMakeLists.txt +++ b/Samples/7_libNVVM/cuda-c-linking/CMakeLists.txt @@ -57,12 +57,12 @@ endif() ############################## ### The math library can be built to support a number of GPU platforms using the -### common compute_50 architecture. +### common compute_75 architecture. ### For a collection of specific architecutres, the compiler options ### "-gencode=compute_XX,code=sm_XX"... can be used multiple times in the same ### nvcc invocation. ### The result is bundled into a system library using the 'nvcc -lib' feature. -### >> nvcc -m64 -arch=compute_50 -dc math-funcs.cu -o math-funcs64.o +### >> nvcc -m64 -arch=compute_75 -dc math-funcs.cu -o math-funcs64.o ### >> nvcc -m64 -lib math-funcs64.o -o libmathfuncs64.a enable_language(CUDA) diff --git a/Samples/7_libNVVM/device-side-launch/dsl.c b/Samples/7_libNVVM/device-side-launch/dsl.c index cc493743..17cfe835 100644 --- a/Samples/7_libNVVM/device-side-launch/dsl.c +++ b/Samples/7_libNVVM/device-side-launch/dsl.c @@ -195,7 +195,7 @@ static CUresult buildKernel(CUcontext *phContext, CUdevice *phDevice, CUmodule * // Initialize CUDA and obtain the device's compute capability. int major = 0, minor = 0; *phDevice = cudaDeviceInit(&major, &minor); - checkCudaErrors(cuCtxCreate(phContext, 0, *phDevice)); + checkCudaErrors(cuCtxCreate(phContext, NULL, 0, *phDevice)); // Get the NVVM IR from file. size_t size = 0; diff --git a/Samples/7_libNVVM/simple/simple.c b/Samples/7_libNVVM/simple/simple.c index 94035e30..58c31d5b 100644 --- a/Samples/7_libNVVM/simple/simple.c +++ b/Samples/7_libNVVM/simple/simple.c @@ -89,7 +89,7 @@ initCUDA(CUcontext *phContext, CUdevice *phDevice, CUmodule *phModule, CUfunctio assert(phContext && phDevice && phModule && phKernel && ptx); // Create a CUDA context on the device. - checkCudaErrors(cuCtxCreate(phContext, 0, *phDevice)); + checkCudaErrors(cuCtxCreate(phContext, NULL, 0, *phDevice)); // Load the PTX. checkCudaErrors(cuModuleLoadDataEx(phModule, ptx, 0, 0, 0)); diff --git a/Samples/7_libNVVM/uvmlite/uvmlite.c b/Samples/7_libNVVM/uvmlite/uvmlite.c index 9cfead84..f977e092 100644 --- a/Samples/7_libNVVM/uvmlite/uvmlite.c +++ b/Samples/7_libNVVM/uvmlite/uvmlite.c @@ -206,7 +206,7 @@ static CUresult buildKernel(CUcontext *phContext, CUdevice *phDevice, CUmodule * *phDevice = cudaDeviceInit(&major, &minor); // Create a context on the device. - checkCudaErrors(cuCtxCreate(phContext, 0, *phDevice)); + checkCudaErrors(cuCtxCreate(phContext, NULL, 0, *phDevice)); // Get the NVVM IR from file. size_t size = 0; diff --git a/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/CMakeLists.txt index 85f72792..41d93645 100644 --- a/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/CMakeLists.txt +++ b/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 72 87 101) +set(CMAKE_CUDA_ARCHITECTURES 87 101) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAErrorReporting/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/cuDLAErrorReporting/CMakeLists.txt index 42e2f1aa..73ceee70 100644 --- a/Samples/8_Platform_Specific/Tegra/cuDLAErrorReporting/CMakeLists.txt +++ b/Samples/8_Platform_Specific/Tegra/cuDLAErrorReporting/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 72 87 101) +set(CMAKE_CUDA_ARCHITECTURES 87 101) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAHybridMode/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/cuDLAHybridMode/CMakeLists.txt index 0a4e81c0..a94ea1c5 100644 --- a/Samples/8_Platform_Specific/Tegra/cuDLAHybridMode/CMakeLists.txt +++ b/Samples/8_Platform_Specific/Tegra/cuDLAHybridMode/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 72 87 101) +set(CMAKE_CUDA_ARCHITECTURES 87 101) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/CMakeLists.txt index 94dc0cb5..545f9e88 100644 --- a/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/CMakeLists.txt +++ b/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 72 87 101) +set(CMAKE_CUDA_ARCHITECTURES 87 101) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/CMakeLists.txt index 2b82a3aa..b963d524 100644 --- a/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/CMakeLists.txt +++ b/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 72 87 101) +set(CMAKE_CUDA_ARCHITECTURES 87 101) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAStandaloneMode/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/cuDLAStandaloneMode/CMakeLists.txt index 9558a7c5..3889eb14 100644 --- a/Samples/8_Platform_Specific/Tegra/cuDLAStandaloneMode/CMakeLists.txt +++ b/Samples/8_Platform_Specific/Tegra/cuDLAStandaloneMode/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 72 87 101) +set(CMAKE_CUDA_ARCHITECTURES 87 101) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/CMakeLists.txt index af982db2..0db51635 100644 --- a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/CMakeLists.txt +++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 72 87 101) +set(CMAKE_CUDA_ARCHITECTURES 87 101) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/CMakeLists.txt index f2f4d749..972c25f6 100644 --- a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/CMakeLists.txt +++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 72 87 101) +set(CMAKE_CUDA_ARCHITECTURES 87 101) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/8_Platform_Specific/Tegra/fluidsGLES/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/fluidsGLES/CMakeLists.txt index 6cd6a5fb..72c33ce7 100644 --- a/Samples/8_Platform_Specific/Tegra/fluidsGLES/CMakeLists.txt +++ b/Samples/8_Platform_Specific/Tegra/fluidsGLES/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 72 87 101) +set(CMAKE_CUDA_ARCHITECTURES 87 101) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) diff --git a/Samples/8_Platform_Specific/Tegra/nbody_opengles/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/nbody_opengles/CMakeLists.txt index 43484fbe..d1f60472 100644 --- a/Samples/8_Platform_Specific/Tegra/nbody_opengles/CMakeLists.txt +++ b/Samples/8_Platform_Specific/Tegra/nbody_opengles/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 72 87 101) +set(CMAKE_CUDA_ARCHITECTURES 87 101) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/simpleGLES/CMakeLists.txt index 52dc2f7f..049aecdd 100644 --- a/Samples/8_Platform_Specific/Tegra/simpleGLES/CMakeLists.txt +++ b/Samples/8_Platform_Specific/Tegra/simpleGLES/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 72 87 101) +set(CMAKE_CUDA_ARCHITECTURES 87 101) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/CMakeLists.txt index bc0b2072..1acd0e39 100644 --- a/Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/CMakeLists.txt +++ b/Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -set(CMAKE_CUDA_ARCHITECTURES 72 87 101) +set(CMAKE_CUDA_ARCHITECTURES 87 101) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG)