From 770e433a9ec260fe659036a43a5d2673b39ce45b Mon Sep 17 00:00:00 2001 From: Peggy Tian Date: Mon, 12 May 2025 06:04:22 +0000 Subject: [PATCH] Bug 5056055: limit register usage to 128 per thread in debug mode to comply with the maximum number of 32-bit registers per SM --- .../conjugateGradientMultiBlockCG/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt index 589059d7..924e640f 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt @@ -13,6 +13,7 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -maxrregcount=128") # limit register usage to 128 per thread to comply with the maximum number of 32-bit registers per SM else() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo") # add line information to all builds for debug tools (exclusive to -G option) endif()