diff --git a/Common/helper_cuda.h b/Common/helper_cuda.h index dc0efc46..401c41b2 100644 --- a/Common/helper_cuda.h +++ b/Common/helper_cuda.h @@ -138,26 +138,26 @@ static const char *_cudaGetErrorEnum(cufftResult error) { case CUFFT_UNALIGNED_DATA: return "CUFFT_UNALIGNED_DATA"; - case CUFFT_INCOMPLETE_PARAMETER_LIST: - return "CUFFT_INCOMPLETE_PARAMETER_LIST"; - case CUFFT_INVALID_DEVICE: return "CUFFT_INVALID_DEVICE"; - case CUFFT_PARSE_ERROR: - return "CUFFT_PARSE_ERROR"; - case CUFFT_NO_WORKSPACE: return "CUFFT_NO_WORKSPACE"; case CUFFT_NOT_IMPLEMENTED: return "CUFFT_NOT_IMPLEMENTED"; - case CUFFT_LICENSE_ERROR: - return "CUFFT_LICENSE_ERROR"; + case CUFFT_MISSING_DEPENDENCY: + return "CUFFT_MISSING_DEPENDENCY"; - case CUFFT_NOT_SUPPORTED: - return "CUFFT_NOT_SUPPORTED"; + case CUFFT_NVRTC_FAILURE: + return "CUFFT_NVRTC_FAILURE"; + + case CUFFT_NVJITLINK_FAILURE: + return "CUFFT_NVJITLINK_FAILURE"; + + case CUFFT_NVSHMEM_FAILURE: + return "CUFFT_NVSHMEM_FAILURE"; } return ""; diff --git a/Samples/0_Introduction/matrixMul_nvrtc/CMakeLists.txt b/Samples/0_Introduction/matrixMul_nvrtc/CMakeLists.txt index 2d038774..1e7f6618 100644 --- a/Samples/0_Introduction/matrixMul_nvrtc/CMakeLists.txt +++ b/Samples/0_Introduction/matrixMul_nvrtc/CMakeLists.txt @@ -45,10 +45,10 @@ add_custom_command(TARGET matrixMul_nvrtc POST_BUILD add_custom_command(TARGET matrixMul_nvrtc POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory - ${CUDAToolkit_BIN_DIR}/../include/nv ${CMAKE_CURRENT_BINARY_DIR}/nv + ${CUDAToolkit_BIN_DIR}/../include/cccl/nv ${CMAKE_CURRENT_BINARY_DIR}/nv ) add_custom_command(TARGET matrixMul_nvrtc POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory - ${CUDAToolkit_BIN_DIR}/../include/cuda ${CMAKE_CURRENT_BINARY_DIR}/cuda + ${CUDAToolkit_BIN_DIR}/../include/cccl/cuda ${CMAKE_CURRENT_BINARY_DIR}/cuda ) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt index 589059d7..924e640f 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt @@ -13,6 +13,7 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -maxrregcount=128") # limit register usage to 128 per thread to comply with the maximum number of 32-bit registers per SM else() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo") # add line information to all builds for debug tools (exclusive to -G option) endif() diff --git a/Samples/5_Domain_Specific/simpleVulkan/VulkanBaseApp.h b/Samples/5_Domain_Specific/simpleVulkan/VulkanBaseApp.h index efb5fac4..7354881d 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/VulkanBaseApp.h +++ b/Samples/5_Domain_Specific/simpleVulkan/VulkanBaseApp.h @@ -34,8 +34,10 @@ #include #ifdef _WIN64 #define NOMINMAX -#include +// Add windows.h to the include path #include +// Add vulkan_win32.h to the include path +#include #endif /* _WIN64 */ /* remove _VK_TIMELINE_SEMAPHORE to use binary semaphores */ diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/VulkanBaseApp.h b/Samples/5_Domain_Specific/simpleVulkanMMAP/VulkanBaseApp.h index 814e321e..4f4425bd 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/VulkanBaseApp.h +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/VulkanBaseApp.h @@ -34,8 +34,10 @@ #include #ifdef _WIN64 #define NOMINMAX -#include +// Add windows.h to the include path firstly as dependency for other Windows headers #include +// Add other Windows headers +#include #endif /* _WIN64 */ struct GLFWwindow; diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA.cu b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA.cu index 702d6ee3..f991da41 100644 --- a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA.cu +++ b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA.cu @@ -27,10 +27,12 @@ #define GLFW_INCLUDE_VULKAN #ifdef _WIN64 +// Add windows.h to the include path firstly as dependency for other Windows headers +#include +// Add other Windows headers #include #include #include -#include #define _USE_MATH_DEFINES #endif diff --git a/Samples/6_Performance/transpose/transpose.cu b/Samples/6_Performance/transpose/transpose.cu index 8a1d8bec..09c7582e 100644 --- a/Samples/6_Performance/transpose/transpose.cu +++ b/Samples/6_Performance/transpose/transpose.cu @@ -597,17 +597,16 @@ int main(int argc, char **argv) 1, TILE_DIM * BLOCK_ROWS); - // Reset d_odata to zero before starting the next loop iteration to avoid - // carrying over results from previous kernels. Without this reset, residual - // data from a prior kernel (e.g., 'copy') could make a subsequent - // kernel (e.g., 'copySharedMem') appear correct even if it performs no work, + // Reset d_odata to zero before starting the next loop iteration to avoid + // carrying over results from previous kernels. Without this reset, residual + // data from a prior kernel (e.g., 'copy') could make a subsequent + // kernel (e.g., 'copySharedMem') appear correct even if it performs no work, // leading to false positives in compareData. for (int i = 0; i < (size_x * size_y); ++i) { h_odata[i] = 0; } // copy host data to device - checkCudaErrors( - cudaMemcpy(d_odata, h_odata, mem_size, cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(d_odata, h_odata, mem_size, cudaMemcpyHostToDevice)); } // cleanup diff --git a/Samples/8_Platform_Specific/Tegra/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/CMakeLists.txt index c1a65040..356c2a13 100644 --- a/Samples/8_Platform_Specific/Tegra/CMakeLists.txt +++ b/Samples/8_Platform_Specific/Tegra/CMakeLists.txt @@ -5,7 +5,7 @@ add_subdirectory(cuDLAHybridMode) add_subdirectory(cuDLALayerwiseStatsHybrid) add_subdirectory(cuDLALayerwiseStatsStandalone) add_subdirectory(cuDLAStandaloneMode) -#add_subdirectory(EGLSync_CUDAEvent_Interop) +add_subdirectory(EGLSync_CUDAEvent_Interop) add_subdirectory(fluidsGLES) add_subdirectory(nbody_opengles) add_subdirectory(simpleGLES) diff --git a/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/CMakeLists.txt index 90ad2f3b..b75b8356 100644 --- a/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/CMakeLists.txt +++ b/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/CMakeLists.txt @@ -17,7 +17,7 @@ else() endif() # Include directories and libraries -include_directories(../../../Common) +include_directories(../../../../Common) find_package(EGL) find_package(X11) diff --git a/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/EGLSync_CUDAEvent_Interop.cu b/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/EGLSync_CUDAEvent_Interop.cu index 1c459c5c..9e709a16 100644 --- a/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/EGLSync_CUDAEvent_Interop.cu +++ b/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/EGLSync_CUDAEvent_Interop.cu @@ -259,7 +259,8 @@ void checkSync(int argc, char **argv) unsigned char *pSurf_read = NULL, *pSurf_write = NULL; int integrated; - CUresult status = CUDA_SUCCESS; + CUresult status = CUDA_SUCCESS; + CUctxCreateParams ctxCreateParams = {}; // Init values for variables x = y = 0; @@ -269,7 +270,7 @@ void checkSync(int argc, char **argv) } device = findCudaDeviceDRV(argc, (const char **)argv); - if (CUDA_SUCCESS != (status = cuCtxCreate(&context, 0, device))) { + if (CUDA_SUCCESS != (status = cuCtxCreate(&context, &ctxCreateParams, 0, device))) { printf("failed to create CUDA context\n"); } cuCtxPushCurrent(context);