diff --git a/CHANGELOG.md b/CHANGELOG.md index 19cc5391..cb70c6f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -77,6 +77,7 @@ * `simpleCUFFT_2d_MGPU` * `simpleCUFFT_MGPU` * `simpleCUFFT_callback` +* Updated toolchain for cross-compilation for Tegra QNX platforms. ### CUDA 12.9 * Updated toolchain for cross-compilation for Tegra Linux platforms. diff --git a/Common/helper_multiprocess.cpp b/Common/helper_multiprocess.cpp index 61fe0855..1e2d9255 100644 --- a/Common/helper_multiprocess.cpp +++ b/Common/helper_multiprocess.cpp @@ -186,7 +186,7 @@ int ipcCreateSocket(ipcHandle *&handle, const char *name, } unlink(name); - bzero(&servaddr, sizeof(servaddr)); + memset(&servaddr, 0, sizeof(servaddr)); servaddr.sun_family = AF_UNIX; size_t len = strlen(name); @@ -220,12 +220,12 @@ int ipcOpenSocket(ipcHandle *&handle) { return -1; } - bzero(&cliaddr, sizeof(cliaddr)); + memset(&cliaddr, 0, sizeof(cliaddr)); cliaddr.sun_family = AF_UNIX; - char temp[10]; + char temp[20]; // Create unique name for the socket. - sprintf(temp, "%u", getpid()); + sprintf(temp, "%s%u", SOCK_FOLDER, getpid()); strcpy(cliaddr.sun_path, temp); if (bind(sock, (struct sockaddr *)&cliaddr, sizeof(cliaddr)) < 0) { @@ -326,7 +326,7 @@ int ipcSendDataToServer(ipcHandle *handle, const char *serverName, ssize_t sendResult; struct sockaddr_un serveraddr; - bzero(&serveraddr, sizeof(serveraddr)); + memset(&serveraddr, 0, sizeof(serveraddr)); serveraddr.sun_family = AF_UNIX; strncpy(serveraddr.sun_path, serverName, sizeof(serveraddr.sun_path) - 1); @@ -359,10 +359,10 @@ int ipcSendShareableHandle(ipcHandle *handle, socklen_t len = sizeof(cliaddr); // Construct client address to send this SHareable handle to - bzero(&cliaddr, sizeof(cliaddr)); + memset(&cliaddr, 0, sizeof(cliaddr)); cliaddr.sun_family = AF_UNIX; - char temp[10]; - sprintf(temp, "%u", process); + char temp[20]; + sprintf(temp, "%s%u", SOCK_FOLDER, process); strcpy(cliaddr.sun_path, temp); len = sizeof(cliaddr); diff --git a/Common/helper_multiprocess.h b/Common/helper_multiprocess.h index 5c760718..d0bdcc20 100644 --- a/Common/helper_multiprocess.h +++ b/Common/helper_multiprocess.h @@ -54,6 +54,13 @@ #endif #include +// Define "/tmp" as socket creating folder for QNX +#if defined(__QNX__) +#define SOCK_FOLDER "/tmp/" +#else +#define SOCK_FOLDER "" +#endif + typedef struct sharedMemoryInfo_st { void *addr; size_t size; diff --git a/README.md b/README.md index 6c45cca9..9af7258f 100644 --- a/README.md +++ b/README.md @@ -135,9 +135,22 @@ $ cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_LIBRARY_PATH=/usr ### QNX -Note that in the current branch sample cross-compilation for QNX is not fully validated. This placeholder will be updated in the -near future with QNX cross-compilation instructions. In the meantime, if you want to cross-compile for QNX please check out one -of the previous tags prior to the CMake build system transition in 12.8. +The sample cross-compilation for QNX is supported since v13.0 CUDA Samples. An example build for Tegra Thor QNX platform might like this: + +``` +$ mkdir build +$ cd build + +QNX_HOST=/path/to/qnx/host QNX_TARGET=/path/to/qnx/target cmake .. -DBUILD_TEGRA=True -DCMAKE_CUDA_COMPILER=/usr/local/cuda-safe-13.0/bin/nvcc -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/toolchain-aarch64-qnx.cmake -DCMAKE_LIBRARY_PATH=/usr/local/cuda-safe-13.0/thor/targets/aarch64-qnx/lib/stubs/ -DCMAKE_INCLUDE_PATH=/usr/local/cuda-safe-13.0/thor/targets/aarch64-qnx/include/ +``` + +### Forward Compatibility + +To build samples with new CUDA Toolkit(CUDA 13.0 or later) and UMD(Version 580 or later) and old KMD(Version 550 or earlier),you need to set the `CMAKE_PREFIX_PATH` for using new driver library, the command might like this: + +``` +cmake -DCMAKE_PREFIX_PATH=/usr/local/cuda/lib64/stubs/ .. +``` ## Running All Samples as Tests diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt b/Samples/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt index be358957..d92baee3 100644 --- a/Samples/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt +++ b/Samples/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt @@ -19,6 +19,12 @@ endif() # Include directories and libraries include_directories(../../../Common) +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "Will not build sample ${PROJECT_NAME} - not supported on QNX") + return() +endif() + # Source file if(CMAKE_GENERATOR MATCHES "Visual Studio") find_package(OpenMP REQUIRED C CXX) diff --git a/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt b/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt index aa2df62a..c777fbd6 100644 --- a/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt +++ b/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt @@ -46,6 +46,14 @@ foreach(arch ${CMAKE_CUDA_ARCHITECTURES}) list(APPEND GENCODE_FLAGS "-gencode=arch=compute_${arch},code=sm_${arch}") endforeach() +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(INCLUDES_LIST) + foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) + list(APPEND INCLUDES_LIST "-I${dir}") + endforeach() + string(JOIN " " INCLUDES "${INCLUDES_LIST}") +endif() + add_custom_command( OUTPUT ${CUDA_FATBIN_FILE} COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt b/Samples/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt index 3f3322f4..3058893e 100644 --- a/Samples/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt +++ b/Samples/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt @@ -31,12 +31,17 @@ set_target_properties(matrixMulDynlinkJIT PROPERTIES CUDA_SEPARABLE_COMPILATION ON POSITION_INDEPENDENT_CODE OFF ) -set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie") + +# Only add -no-pie for GCC or Clang +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie") +endif() + target_link_libraries(matrixMulDynlinkJIT PUBLIC CUDA::cudart CUDA::cuda_driver ) -if(UNIX) +if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") target_link_libraries(matrixMulDynlinkJIT PUBLIC dl) endif() diff --git a/Samples/0_Introduction/matrixMul_nvrtc/CMakeLists.txt b/Samples/0_Introduction/matrixMul_nvrtc/CMakeLists.txt index 1e7f6618..48c4d32d 100644 --- a/Samples/0_Introduction/matrixMul_nvrtc/CMakeLists.txt +++ b/Samples/0_Introduction/matrixMul_nvrtc/CMakeLists.txt @@ -32,23 +32,26 @@ target_link_libraries(matrixMul_nvrtc PRIVATE CUDA::cuda_driver ) +# The primary directory of CUDAToolkit_INCLUDE_DIRS is the CUDA Toolkit's include directory for finding the header files. +list(GET CUDAToolkit_INCLUDE_DIRS 0 CUDA_INCLUDE_DIR) + # Copy clock_kernel.cu to the output directory add_custom_command(TARGET matrixMul_nvrtc POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different - ${CMAKE_CURRENT_SOURCE_DIR}/matrixMul_kernel.cu ${CUDAToolkit_BIN_DIR}/../include/cooperative_groups.h ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/matrixMul_kernel.cu ${CUDA_INCLUDE_DIR}/cooperative_groups.h ${CMAKE_CURRENT_BINARY_DIR} ) add_custom_command(TARGET matrixMul_nvrtc POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory - ${CUDAToolkit_BIN_DIR}/../include/cooperative_groups ${CMAKE_CURRENT_BINARY_DIR}/cooperative_groups + ${CUDA_INCLUDE_DIR}/cooperative_groups ${CMAKE_CURRENT_BINARY_DIR}/cooperative_groups ) add_custom_command(TARGET matrixMul_nvrtc POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory - ${CUDAToolkit_BIN_DIR}/../include/cccl/nv ${CMAKE_CURRENT_BINARY_DIR}/nv + ${CUDA_INCLUDE_DIR}/cccl/nv ${CMAKE_CURRENT_BINARY_DIR}/nv ) add_custom_command(TARGET matrixMul_nvrtc POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory - ${CUDAToolkit_BIN_DIR}/../include/cccl/cuda ${CMAKE_CURRENT_BINARY_DIR}/cuda + ${CUDA_INCLUDE_DIR}/cccl/cuda ${CMAKE_CURRENT_BINARY_DIR}/cuda ) diff --git a/Samples/0_Introduction/simpleAWBarrier/CMakeLists.txt b/Samples/0_Introduction/simpleAWBarrier/CMakeLists.txt index b21a485b..18f001e2 100644 --- a/Samples/0_Introduction/simpleAWBarrier/CMakeLists.txt +++ b/Samples/0_Introduction/simpleAWBarrier/CMakeLists.txt @@ -20,6 +20,12 @@ endif() # Include directories and libraries include_directories(../../../Common) +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "simpleAWBarrier is not supported on QNX") + return() +endif() + # Source file # Add target for simpleAWBarrier add_executable(simpleAWBarrier simpleAWBarrier.cu) diff --git a/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt b/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt index 487eb8dd..0774b4d5 100644 --- a/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt +++ b/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt @@ -48,6 +48,14 @@ foreach(arch ${CMAKE_CUDA_ARCHITECTURES}) list(APPEND GENCODE_FLAGS "-gencode=arch=compute_${arch},code=sm_${arch}") endforeach() +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(INCLUDES_LIST) + foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) + list(APPEND INCLUDES_LIST "-I${dir}") + endforeach() + string(JOIN " " INCLUDES "${INCLUDES_LIST}") +endif() + add_custom_command( OUTPUT ${CUDA_FATBIN_FILE} COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} diff --git a/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt b/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt index ecd71b1d..19f7f079 100644 --- a/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt +++ b/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt @@ -45,6 +45,14 @@ foreach(arch ${CMAKE_CUDA_ARCHITECTURES}) list(APPEND GENCODE_FLAGS "-gencode=arch=compute_${arch},code=sm_${arch}") endforeach() +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(INCLUDES_LIST) + foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) + list(APPEND INCLUDES_LIST "-I${dir}") + endforeach() + string(JOIN " " INCLUDES "${INCLUDES_LIST}") +endif() + add_custom_command( OUTPUT ${CUDA_FATBIN_FILE} COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} diff --git a/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt b/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt index 664fce2f..0b6a3477 100644 --- a/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt +++ b/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt @@ -45,6 +45,14 @@ foreach(arch ${CMAKE_CUDA_ARCHITECTURES}) list(APPEND GENCODE_FLAGS "-gencode=arch=compute_${arch},code=sm_${arch}") endforeach() +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(INCLUDES_LIST) + foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) + list(APPEND INCLUDES_LIST "-I${dir}") + endforeach() + string(JOIN " " INCLUDES "${INCLUDES_LIST}") +endif() + add_custom_command( OUTPUT ${CUDA_FATBIN_FILE} COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt index ff7ec77c..cd04098a 100644 --- a/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt @@ -37,7 +37,7 @@ target_link_libraries(threadMigration PUBLIC CUDA::cuda_driver ) -if(UNIX) +if(UNIX AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "QNX") target_link_libraries(threadMigration PUBLIC pthread) endif() @@ -50,6 +50,14 @@ foreach(arch ${CMAKE_CUDA_ARCHITECTURES}) list(APPEND GENCODE_FLAGS "-gencode=arch=compute_${arch},code=sm_${arch}") endforeach() +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(INCLUDES_LIST) + foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) + list(APPEND INCLUDES_LIST "-I${dir}") + endforeach() + string(JOIN " " INCLUDES "${INCLUDES_LIST}") +endif() + add_custom_command( OUTPUT ${CUDA_FATBIN_FILE} COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/CMakeLists.txt b/Samples/3_CUDA_Features/bf16TensorCoreGemm/CMakeLists.txt index c395548b..3e949ec6 100644 --- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/CMakeLists.txt +++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/CMakeLists.txt @@ -18,6 +18,12 @@ endif() # Include directories and libraries include_directories(../../../Common) +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "Will not build sample ${PROJECT_NAME} - not supported on QNX") + return() +endif() + # Source file # Add target for bf16TensorCoreGemm add_executable(bf16TensorCoreGemm bf16TensorCoreGemm.cu) diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt index 4d4784b0..edbd7698 100644 --- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt +++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt @@ -9,8 +9,9 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) # The aarch64/sbsa_aarch64 CUDA toolkit are support on Tegra since 13.0, so need to check which version of the toolkit is installed -string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_ctk) -if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT _aarch64_ctk EQUAL -1) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_linux_ctk) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-qnx" _aarch64_qnx_ctk) +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND (NOT _aarch64_linux_ctk EQUAL -1 OR NOT _aarch64_qnx_ctk EQUAL -1)) set(CMAKE_CUDA_ARCHITECTURES 87 110) else() set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90 100 110 120) diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/CMakeLists.txt b/Samples/3_CUDA_Features/cdpBezierTessellation/CMakeLists.txt index c5eb4703..fec2f807 100644 --- a/Samples/3_CUDA_Features/cdpBezierTessellation/CMakeLists.txt +++ b/Samples/3_CUDA_Features/cdpBezierTessellation/CMakeLists.txt @@ -9,8 +9,9 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) # The aarch64/sbsa_aarch64 CUDA toolkit are support on Tegra since 13.0, so need to check which version of the toolkit is installed -string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_ctk) -if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT _aarch64_ctk EQUAL -1) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_linux_ctk) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-qnx" _aarch64_qnx_ctk) +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND (NOT _aarch64_linux_ctk EQUAL -1 OR NOT _aarch64_qnx_ctk EQUAL -1)) set(CMAKE_CUDA_ARCHITECTURES 87 110) else() set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90 100 110 120) diff --git a/Samples/3_CUDA_Features/cdpQuadtree/CMakeLists.txt b/Samples/3_CUDA_Features/cdpQuadtree/CMakeLists.txt index a3fd2eba..85fe0c8c 100644 --- a/Samples/3_CUDA_Features/cdpQuadtree/CMakeLists.txt +++ b/Samples/3_CUDA_Features/cdpQuadtree/CMakeLists.txt @@ -9,8 +9,9 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) # The aarch64/sbsa_aarch64 CUDA toolkit are support on Tegra since 13.0, so need to check which version of the toolkit is installed -string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_ctk) -if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT _aarch64_ctk EQUAL -1) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_linux_ctk) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-qnx" _aarch64_qnx_ctk) +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND (NOT _aarch64_linux_ctk EQUAL -1 OR NOT _aarch64_qnx_ctk EQUAL -1)) set(CMAKE_CUDA_ARCHITECTURES 87 110) else() set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90 100 110 120) diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/CMakeLists.txt b/Samples/3_CUDA_Features/cdpSimplePrint/CMakeLists.txt index e58ee19a..726a555d 100644 --- a/Samples/3_CUDA_Features/cdpSimplePrint/CMakeLists.txt +++ b/Samples/3_CUDA_Features/cdpSimplePrint/CMakeLists.txt @@ -9,8 +9,9 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) # The aarch64/sbsa_aarch64 CUDA toolkit are support on Tegra since 13.0, so need to check which version of the toolkit is installed -string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_ctk) -if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT _aarch64_ctk EQUAL -1) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_linux_ctk) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-qnx" _aarch64_qnx_ctk) +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND (NOT _aarch64_linux_ctk EQUAL -1 OR NOT _aarch64_qnx_ctk EQUAL -1)) set(CMAKE_CUDA_ARCHITECTURES 87 110) else() set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90 100 110 120) diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/CMakeLists.txt b/Samples/3_CUDA_Features/cdpSimpleQuicksort/CMakeLists.txt index bbb61864..e6eabe4e 100644 --- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/CMakeLists.txt +++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/CMakeLists.txt @@ -9,8 +9,9 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) # The aarch64/sbsa_aarch64 CUDA toolkit are support on Tegra since 13.0, so need to check which version of the toolkit is installed -string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_ctk) -if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT _aarch64_ctk EQUAL -1) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_linux_ctk) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-qnx" _aarch64_qnx_ctk) +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND (NOT _aarch64_linux_ctk EQUAL -1 OR NOT _aarch64_qnx_ctk EQUAL -1)) set(CMAKE_CUDA_ARCHITECTURES 87 110) else() set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90 100 110 120) diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/CMakeLists.txt b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/CMakeLists.txt index a48c1c6b..cb94a68b 100644 --- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/CMakeLists.txt +++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/CMakeLists.txt @@ -17,6 +17,12 @@ else() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo") # add line information to all builds for debug tools (exclusive to -G option) endif() +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "Will not build sample ${PROJECT_NAME} - not supported on QNX") + return() +endif() + # Include directories and libraries include_directories(../../../Common) diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/CMakeLists.txt b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/CMakeLists.txt index e2a790ed..caf971b0 100644 --- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/CMakeLists.txt +++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/CMakeLists.txt @@ -20,6 +20,12 @@ endif() # Include directories and libraries include_directories(../../../Common) +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "Will not build sample ${PROJECT_NAME} - not supported on QNX") + return() +endif() + # Source file # Add target for globalToShmemAsyncCopy add_executable(globalToShmemAsyncCopy globalToShmemAsyncCopy.cu) diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt b/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt index 5e6c54bd..b57e9187 100644 --- a/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt +++ b/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt @@ -37,13 +37,25 @@ target_link_libraries(memMapIPCDrv PUBLIC CUDA::cuda_driver ) -if(UNIX) +if(UNIX AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "QNX") target_link_libraries(memMapIPCDrv PUBLIC rt) endif() +if(${CMAKE_SYSTEM_NAME} STREQUAL "QNX") + target_link_libraries(memMapIPCDrv PUBLIC socket) +endif() + set(CUDA_PTX_FILE "${CMAKE_CURRENT_BINARY_DIR}/memMapIpc_kernel64.ptx") set(CUDA_KERNEL_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/memMapIpc_kernel.cu") +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(INCLUDES_LIST) + foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) + list(APPEND INCLUDES_LIST "-I${dir}") + endforeach() + string(JOIN " " INCLUDES "${INCLUDES_LIST}") +endif() + add_custom_command( OUTPUT ${CUDA_PTX_FILE} COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets -o ${CUDA_PTX_FILE} -ptx ${CUDA_KERNEL_SOURCE} diff --git a/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt b/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt index ea1de621..575949b0 100644 --- a/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt +++ b/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt @@ -41,6 +41,14 @@ target_link_libraries(ptxjit PUBLIC set(CUDA_PTX_FILE "${CMAKE_CURRENT_BINARY_DIR}/ptxjit_kernel64.ptx") set(CUDA_KERNEL_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ptxjit_kernel.cu") +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(INCLUDES_LIST) + foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) + list(APPEND INCLUDES_LIST "-I${dir}") + endforeach() + string(JOIN " " INCLUDES "${INCLUDES_LIST}") +endif() + add_custom_command( OUTPUT ${CUDA_PTX_FILE} COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets -o ${CUDA_PTX_FILE} -ptx ${CUDA_KERNEL_SOURCE} diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/CMakeLists.txt b/Samples/3_CUDA_Features/tf32TensorCoreGemm/CMakeLists.txt index 139b63a7..084d52f5 100644 --- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/CMakeLists.txt +++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/CMakeLists.txt @@ -20,6 +20,12 @@ endif() # Include directories and libraries include_directories(../../../Common) +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "Will not build sample ${PROJECT_NAME} - not supported on QNX") + return() +endif() + # Source file # Add target for tf32TensorCoreGemm add_executable(tf32TensorCoreGemm tf32TensorCoreGemm.cu) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/CMakeLists.txt b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/CMakeLists.txt index bcb09bd4..982542e7 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/CMakeLists.txt @@ -4,6 +4,11 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/Module project(conjugateGradientCudaGraphs LANGUAGES CUDA CXX) +# Disable response file for libraries on QNX as qcc does not support lib paths with double quotes +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_LIBRARIES OFF) +endif() + find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt index 924e640f..3d67e92c 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt @@ -21,6 +21,12 @@ endif() # Include directories and libraries include_directories(../../../Common) +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "Will not build sample ${PROJECT_NAME} - not supported on QNX") + return() +endif() + # Source file # Add target for conjugateGradientMultiBlockCG add_executable(conjugateGradientMultiBlockCG conjugateGradientMultiBlockCG.cu) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/CMakeLists.txt b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/CMakeLists.txt index 3d02538d..ccf76ee7 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/CMakeLists.txt @@ -20,6 +20,12 @@ endif() # Include directories and libraries include_directories(../../../Common) +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "Will not build sample ${PROJECT_NAME} - not supported on QNX") + return() +endif() + # Source file # Add target for conjugateGradientMultiDeviceCG add_executable(conjugateGradientMultiDeviceCG conjugateGradientMultiDeviceCG.cu) diff --git a/Samples/4_CUDA_Libraries/jitLto/CMakeLists.txt b/Samples/4_CUDA_Libraries/jitLto/CMakeLists.txt index 42520c7d..23c33291 100644 --- a/Samples/4_CUDA_Libraries/jitLto/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/jitLto/CMakeLists.txt @@ -19,6 +19,12 @@ endif() # Include directories and libraries include_directories(../../../Common) +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "Will not build sample ${PROJECT_NAME} - not supported on QNX") + return() +endif() + # Source file # Add target for jitLto add_executable(jitLto jitLto.cpp) diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/CMakeLists.txt b/Samples/4_CUDA_Libraries/simpleCUFFT/CMakeLists.txt index dc0ed28e..5373206c 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/simpleCUFFT/CMakeLists.txt @@ -4,6 +4,11 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/Module project(simpleCUFFT LANGUAGES CUDA) +# Disable response file for libraries on QNX as qcc does not support lib paths with double quotes +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_LIBRARIES OFF) +endif() + find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/CMakeLists.txt b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/CMakeLists.txt index 25deba62..64783b2f 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/CMakeLists.txt @@ -4,6 +4,11 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/Module project(simpleCUFFT_2d_MGPU LANGUAGES CUDA) +# Disable response file for libraries on QNX as qcc does not support lib paths with double quotes +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_LIBRARIES OFF) +endif() + find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/CMakeLists.txt b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/CMakeLists.txt index 98c9b4bf..34aa1146 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/CMakeLists.txt @@ -4,6 +4,11 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/Module project(simpleCUFFT_MGPU LANGUAGES CUDA) +# Disable response file for libraries on QNX as qcc does not support lib paths with double quotes +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_LIBRARIES OFF) +endif() + find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/CMakeLists.txt b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/CMakeLists.txt index cd38056a..d9116f36 100644 --- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/CMakeLists.txt @@ -19,6 +19,12 @@ endif() # Include directories and libraries include_directories(../../../Common) +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "Will not build sample ${PROJECT_NAME} - not supported on QNX") + return() +endif() + # Source file # Add target for watershedSegmentationNPP add_executable(watershedSegmentationNPP watershedSegmentationNPP.cpp) diff --git a/Samples/7_libNVVM/README.md b/Samples/7_libNVVM/README.md index 9aa39ed0..efb957ec 100644 --- a/Samples/7_libNVVM/README.md +++ b/Samples/7_libNVVM/README.md @@ -58,6 +58,12 @@ Alternatively, we provide a Makefile that will automatically build these samples on Linux as part of the toplevel cuda-samples build. Windows users should build manually via utils/built.bat or Visual Studio's CMake integration. +To build and run samples with new CUDA Toolkit(CUDA 13.0 or later) and UMD +(Version 580 or later) and old KMD(Version 550 or earlier),user need to +update the CMake invocation in utils/build.sh or build.bat with adding +`CMAKE_PREFIX_PATH` with stubs path like this: +"-DCMAKE_PREFIX_PATH=/usr/local/cuda/lib64/stubs/" + A Note About the cuda-c-linking Sample -------------------------------------- diff --git a/cmake/toolchains/toolchain-aarch64-qnx.cmake b/cmake/toolchains/toolchain-aarch64-qnx.cmake index 98e6f3d0..df9839b8 100644 --- a/cmake/toolchains/toolchain-aarch64-qnx.cmake +++ b/cmake/toolchains/toolchain-aarch64-qnx.cmake @@ -1,29 +1,13 @@ -#============================================================================== -# Toolchain file for cross-compiling to aarch64 QNX -#============================================================================== - -# Cross-compiling, so tell CMake that we are not building for the host system set(CMAKE_SYSTEM_NAME QNX) - -# Target processor architecture set(CMAKE_SYSTEM_PROCESSOR aarch64) -#------------------------------------------------------------------------------ -# QNX host and target come from environment -# Adjust these or hard-code paths as needed: -# -# set(QNX_HOST "/path/to/qnx/host") # e.g. /qnx/qnx710/host/linux/x86_64 -# set(QNX_TARGET "/path/to/qnx/target") # e.g. /qnx/qnx710/target/qnx7 -# -# You can also pass them on the cmake command line: -# cmake -D QNX_HOST=/path/to/qnx/host \ -# -D QNX_TARGET=/path/to/qnx/target \ -# -DCMAKE_TOOLCHAIN_FILE=toolchain-aarch64-qnx.cmake .. -#------------------------------------------------------------------------------ +# Need to set the QNX_HOST and QNX_TARGET environment variables +set(QNX_HOST $ENV{QNX_HOST}) +set(QNX_TARGET $ENV{QNX_TARGET}) + +message(STATUS "QNX_HOST = ${QNX_HOST}") +message(STATUS "QNX_TARGET = ${QNX_TARGET}") -#---------------------------------------------------------------------------- -# C/C++ Compilers from QNX -#---------------------------------------------------------------------------- find_program(QNX_QCC NAMES qcc PATHS "${QNX_HOST}/usr/bin") find_program(QNX_QPLUS NAMES q++ PATHS "${QNX_HOST}/usr/bin") @@ -31,50 +15,23 @@ if(NOT QNX_QCC OR NOT QNX_QPLUS) message(FATAL_ERROR "Could not find qcc or q++ in QNX_HOST=${QNX_HOST}/usr/bin") endif() -set(CMAKE_C_COMPILER "${QNX_QCC}") -set(CMAKE_CXX_COMPILER "${QNX_QPLUS}") +# Specify the cross-compilers +set(CMAKE_C_COMPILER ${QNX_QCC}) +set(CMAKE_CXX_COMPILER ${QNX_QPLUS}) -#---------------------------------------------------------------------------- -# Sysroot (if you want CMake to know the default sysroot) -#---------------------------------------------------------------------------- -# This is optional, but convenient if the QNX headers/libraries must be found: -#---------------------------------------------------------------------------- -if(DEFINED QNX_TARGET) - set(CMAKE_SYSROOT "${QNX_TARGET}") -endif() +set(CMAKE_C_COMPILER_TARGET aarch64) +set(CMAKE_CXX_COMPILER_TARGET aarch64) -#---------------------------------------------------------------------------- -# Additional preprocessor definitions & include paths -#---------------------------------------------------------------------------- -add_compile_options( - -D_QNX_SOURCE - -DWIN_INTERFACE_CUSTOM -) +# Set compiler flags +set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE STRING "" FORCE) +set(CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "-nodlink -L${CUDA_ROOT}/lib64 -L${CUDA_ROOT}/lib -I${CUDA_ROOT}/include") -# Add an include path to /usr/include/aarch64-qnx-gnu: -include_directories("/usr/include/aarch64-qnx-gnu") +set(CMAKE_C_FLAGS " \"-V${__qnx_gcc_ver},gcc_ntoaarch64le\"") +set(CMAKE_CXX_FLAGS " \"-V${__qnx_gcc_ver},gcc_ntoaarch64le\"") +set(CMAKE_CUDA_FLAGS " --qpp-config=${__qnx_gcc_ver},gcc_ntoaarch64le") +set(AUTOMAGIC_NVCC_FLAGS --qpp-config=${__qnx_gcc_ver},gcc_ntoaarch64le CACHE STRING "automagic feature detection flags for cross build") +add_link_options("-V${__qnx_gcc_ver},gcc_ntoaarch64le") -#---------------------------------------------------------------------------- -# Linker flags -#---------------------------------------------------------------------------- -set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L/usr/lib/aarch64-qnx-gnu") - -# Because the Makefile also adds -Wl,-rpath-link,/usr/lib/aarch64-qnx-gnu: -set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-rpath-link,/usr/lib/aarch64-qnx-gnu") - -# If you have a “target filesystem” (TARGET_FS) to link with: -# -L$(TARGET_FS)/usr/lib -# -L$(TARGET_FS)/usr/libnvidia -# etc., you can optionally extend the link flags. For example: -# -# if(DEFINED TARGET_FS) -# set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} \ -# -L${TARGET_FS}/usr/lib -Wl,-rpath-link,${TARGET_FS}/usr/lib \ -# -L${TARGET_FS}/usr/libnvidia -Wl,-rpath-link,${TARGET_FS}/usr/libnvidia") -# include_directories("${TARGET_FS}/../include") -# endif() - -# If you need to link additional libraries, e.g. -lslog2 under certain conditions: -# list(APPEND EXTRA_LIBS "slog2") -# ... -#---------------------------------------------------------------------------- +set(CROSS_COMPILE_FOR_QNX ON CACHE BOOL "Cross compiling for QNX platforms") +string(APPEND CMAKE_CXX_FLAGS " -D_QNX_SOURCE") +string(APPEND CMAKE_CUDA_FLAGS " -D_QNX_SOURCE")