Merge branch 'master' into cuda_a_dev

This commit is contained in:
Rob Armstrong 2025-05-21 09:29:30 -07:00
commit 5ce8b512ea
10 changed files with 32 additions and 25 deletions

View File

@ -138,26 +138,26 @@ static const char *_cudaGetErrorEnum(cufftResult error) {
case CUFFT_UNALIGNED_DATA: case CUFFT_UNALIGNED_DATA:
return "CUFFT_UNALIGNED_DATA"; return "CUFFT_UNALIGNED_DATA";
case CUFFT_INCOMPLETE_PARAMETER_LIST:
return "CUFFT_INCOMPLETE_PARAMETER_LIST";
case CUFFT_INVALID_DEVICE: case CUFFT_INVALID_DEVICE:
return "CUFFT_INVALID_DEVICE"; return "CUFFT_INVALID_DEVICE";
case CUFFT_PARSE_ERROR:
return "CUFFT_PARSE_ERROR";
case CUFFT_NO_WORKSPACE: case CUFFT_NO_WORKSPACE:
return "CUFFT_NO_WORKSPACE"; return "CUFFT_NO_WORKSPACE";
case CUFFT_NOT_IMPLEMENTED: case CUFFT_NOT_IMPLEMENTED:
return "CUFFT_NOT_IMPLEMENTED"; return "CUFFT_NOT_IMPLEMENTED";
case CUFFT_LICENSE_ERROR: case CUFFT_MISSING_DEPENDENCY:
return "CUFFT_LICENSE_ERROR"; return "CUFFT_MISSING_DEPENDENCY";
case CUFFT_NOT_SUPPORTED: case CUFFT_NVRTC_FAILURE:
return "CUFFT_NOT_SUPPORTED"; return "CUFFT_NVRTC_FAILURE";
case CUFFT_NVJITLINK_FAILURE:
return "CUFFT_NVJITLINK_FAILURE";
case CUFFT_NVSHMEM_FAILURE:
return "CUFFT_NVSHMEM_FAILURE";
} }
return "<unknown>"; return "<unknown>";

View File

@ -45,10 +45,10 @@ add_custom_command(TARGET matrixMul_nvrtc POST_BUILD
add_custom_command(TARGET matrixMul_nvrtc POST_BUILD add_custom_command(TARGET matrixMul_nvrtc POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory COMMAND ${CMAKE_COMMAND} -E copy_directory
${CUDAToolkit_BIN_DIR}/../include/nv ${CMAKE_CURRENT_BINARY_DIR}/nv ${CUDAToolkit_BIN_DIR}/../include/cccl/nv ${CMAKE_CURRENT_BINARY_DIR}/nv
) )
add_custom_command(TARGET matrixMul_nvrtc POST_BUILD add_custom_command(TARGET matrixMul_nvrtc POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory COMMAND ${CMAKE_COMMAND} -E copy_directory
${CUDAToolkit_BIN_DIR}/../include/cuda ${CMAKE_CURRENT_BINARY_DIR}/cuda ${CUDAToolkit_BIN_DIR}/../include/cccl/cuda ${CMAKE_CURRENT_BINARY_DIR}/cuda
) )

View File

@ -13,6 +13,7 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
if(ENABLE_CUDA_DEBUG) if(ENABLE_CUDA_DEBUG)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -maxrregcount=128") # limit register usage to 128 per thread to comply with the maximum number of 32-bit registers per SM
else() else()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo") # add line information to all builds for debug tools (exclusive to -G option) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo") # add line information to all builds for debug tools (exclusive to -G option)
endif() endif()

View File

@ -34,8 +34,10 @@
#include <vulkan/vulkan.h> #include <vulkan/vulkan.h>
#ifdef _WIN64 #ifdef _WIN64
#define NOMINMAX #define NOMINMAX
#include <vulkan/vulkan_win32.h> // Add windows.h to the include path
#include <windows.h> #include <windows.h>
// Add vulkan_win32.h to the include path
#include <vulkan/vulkan_win32.h>
#endif /* _WIN64 */ #endif /* _WIN64 */
/* remove _VK_TIMELINE_SEMAPHORE to use binary semaphores */ /* remove _VK_TIMELINE_SEMAPHORE to use binary semaphores */

View File

@ -34,8 +34,10 @@
#include <vulkan/vulkan.h> #include <vulkan/vulkan.h>
#ifdef _WIN64 #ifdef _WIN64
#define NOMINMAX #define NOMINMAX
#include <vulkan/vulkan_win32.h> // Add windows.h to the include path firstly as dependency for other Windows headers
#include <windows.h> #include <windows.h>
// Add other Windows headers
#include <vulkan/vulkan_win32.h>
#endif /* _WIN64 */ #endif /* _WIN64 */
struct GLFWwindow; struct GLFWwindow;

View File

@ -27,10 +27,12 @@
#define GLFW_INCLUDE_VULKAN #define GLFW_INCLUDE_VULKAN
#ifdef _WIN64 #ifdef _WIN64
// Add windows.h to the include path firstly as dependency for other Windows headers
#include <windows.h>
// Add other Windows headers
#include <VersionHelpers.h> #include <VersionHelpers.h>
#include <aclapi.h> #include <aclapi.h>
#include <dxgi1_2.h> #include <dxgi1_2.h>
#include <windows.h>
#define _USE_MATH_DEFINES #define _USE_MATH_DEFINES
#endif #endif

View File

@ -606,8 +606,7 @@ int main(int argc, char **argv)
h_odata[i] = 0; h_odata[i] = 0;
} }
// copy host data to device // copy host data to device
checkCudaErrors( checkCudaErrors(cudaMemcpy(d_odata, h_odata, mem_size, cudaMemcpyHostToDevice));
cudaMemcpy(d_odata, h_odata, mem_size, cudaMemcpyHostToDevice));
} }
// cleanup // cleanup

View File

@ -5,7 +5,7 @@ add_subdirectory(cuDLAHybridMode)
add_subdirectory(cuDLALayerwiseStatsHybrid) add_subdirectory(cuDLALayerwiseStatsHybrid)
add_subdirectory(cuDLALayerwiseStatsStandalone) add_subdirectory(cuDLALayerwiseStatsStandalone)
add_subdirectory(cuDLAStandaloneMode) add_subdirectory(cuDLAStandaloneMode)
#add_subdirectory(EGLSync_CUDAEvent_Interop) add_subdirectory(EGLSync_CUDAEvent_Interop)
add_subdirectory(fluidsGLES) add_subdirectory(fluidsGLES)
add_subdirectory(nbody_opengles) add_subdirectory(nbody_opengles)
add_subdirectory(simpleGLES) add_subdirectory(simpleGLES)

View File

@ -17,7 +17,7 @@ else()
endif() endif()
# Include directories and libraries # Include directories and libraries
include_directories(../../../Common) include_directories(../../../../Common)
find_package(EGL) find_package(EGL)
find_package(X11) find_package(X11)

View File

@ -259,7 +259,8 @@ void checkSync(int argc, char **argv)
unsigned char *pSurf_read = NULL, *pSurf_write = NULL; unsigned char *pSurf_read = NULL, *pSurf_write = NULL;
int integrated; int integrated;
CUresult status = CUDA_SUCCESS; CUresult status = CUDA_SUCCESS;
CUctxCreateParams ctxCreateParams = {};
// Init values for variables // Init values for variables
x = y = 0; x = y = 0;
@ -269,7 +270,7 @@ void checkSync(int argc, char **argv)
} }
device = findCudaDeviceDRV(argc, (const char **)argv); device = findCudaDeviceDRV(argc, (const char **)argv);
if (CUDA_SUCCESS != (status = cuCtxCreate(&context, 0, device))) { if (CUDA_SUCCESS != (status = cuCtxCreate(&context, &ctxCreateParams, 0, device))) {
printf("failed to create CUDA context\n"); printf("failed to create CUDA context\n");
} }
cuCtxPushCurrent(context); cuCtxPushCurrent(context);