From 49307463b5f91631a06760432200de3ec2ab6338 Mon Sep 17 00:00:00 2001 From: shawnz Date: Thu, 29 May 2025 16:57:28 +0800 Subject: [PATCH 01/12] Bug 5133216: Add QNX tooltrain and cross build support --- .../UnifiedMemoryStreams/CMakeLists.txt | 6 ++ .../matrixMulDrv/CMakeLists.txt | 8 ++ .../matrixMulDynlinkJIT/CMakeLists.txt | 2 +- .../simpleDrvRuntime/CMakeLists.txt | 8 ++ .../simpleTextureDrv/CMakeLists.txt | 8 ++ .../vectorAddDrv/CMakeLists.txt | 8 ++ .../threadMigration/CMakeLists.txt | 8 ++ .../bf16TensorCoreGemm/CMakeLists.txt | 6 ++ .../dmmaTensorCoreGemm/CMakeLists.txt | 6 ++ .../globalToShmemAsyncCopy/CMakeLists.txt | 6 ++ .../tf32TensorCoreGemm/CMakeLists.txt | 6 ++ .../CMakeLists.txt | 6 ++ .../CMakeLists.txt | 6 ++ .../4_CUDA_Libraries/jitLto/CMakeLists.txt | 6 ++ .../watershedSegmentationNPP/CMakeLists.txt | 6 ++ cmake/toolchains/toolchain-aarch64-qnx.cmake | 89 ++++--------------- 16 files changed, 113 insertions(+), 72 deletions(-) diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt b/Samples/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt index be358957..d92baee3 100644 --- a/Samples/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt +++ b/Samples/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt @@ -19,6 +19,12 @@ endif() # Include directories and libraries include_directories(../../../Common) +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "Will not build sample ${PROJECT_NAME} - not supported on QNX") + return() +endif() + # Source file if(CMAKE_GENERATOR MATCHES "Visual Studio") find_package(OpenMP REQUIRED C CXX) diff --git a/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt b/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt index aa2df62a..5cdb581c 100644 --- a/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt +++ b/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt @@ -46,6 +46,14 @@ foreach(arch ${CMAKE_CUDA_ARCHITECTURES}) list(APPEND GENCODE_FLAGS "-gencode=arch=compute_${arch},code=sm_${arch}") endforeach() +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(INCLUDES_LIST) + foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) + list(APPEND INCLUDES_LIST "-I${dir}") + endforeach() + string(JOIN " " INCLUDES "${INCLUDES_LIST}") +endif() + add_custom_command( OUTPUT ${CUDA_FATBIN_FILE} COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt b/Samples/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt index 3f3322f4..70296afc 100644 --- a/Samples/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt +++ b/Samples/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt @@ -37,6 +37,6 @@ target_link_libraries(matrixMulDynlinkJIT PUBLIC CUDA::cuda_driver ) -if(UNIX) +if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") target_link_libraries(matrixMulDynlinkJIT PUBLIC dl) endif() diff --git a/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt b/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt index 487eb8dd..04684da2 100644 --- a/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt +++ b/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt @@ -48,6 +48,14 @@ foreach(arch ${CMAKE_CUDA_ARCHITECTURES}) list(APPEND GENCODE_FLAGS "-gencode=arch=compute_${arch},code=sm_${arch}") endforeach() +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(INCLUDES_LIST) + foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) + list(APPEND INCLUDES_LIST "-I${dir}") + endforeach() + string(JOIN " " INCLUDES "${INCLUDES_LIST}") +endif() + add_custom_command( OUTPUT ${CUDA_FATBIN_FILE} COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} diff --git a/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt b/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt index ecd71b1d..befbfd05 100644 --- a/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt +++ b/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt @@ -45,6 +45,14 @@ foreach(arch ${CMAKE_CUDA_ARCHITECTURES}) list(APPEND GENCODE_FLAGS "-gencode=arch=compute_${arch},code=sm_${arch}") endforeach() +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(INCLUDES_LIST) + foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) + list(APPEND INCLUDES_LIST "-I${dir}") + endforeach() + string(JOIN " " INCLUDES "${INCLUDES_LIST}") +endif() + add_custom_command( OUTPUT ${CUDA_FATBIN_FILE} COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} diff --git a/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt b/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt index 664fce2f..abbef92e 100644 --- a/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt +++ b/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt @@ -45,6 +45,14 @@ foreach(arch ${CMAKE_CUDA_ARCHITECTURES}) list(APPEND GENCODE_FLAGS "-gencode=arch=compute_${arch},code=sm_${arch}") endforeach() +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(INCLUDES_LIST) + foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) + list(APPEND INCLUDES_LIST "-I${dir}") + endforeach() + string(JOIN " " INCLUDES "${INCLUDES_LIST}") +endif() + add_custom_command( OUTPUT ${CUDA_FATBIN_FILE} COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt index ff7ec77c..9603b6c6 100644 --- a/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt @@ -50,6 +50,14 @@ foreach(arch ${CMAKE_CUDA_ARCHITECTURES}) list(APPEND GENCODE_FLAGS "-gencode=arch=compute_${arch},code=sm_${arch}") endforeach() +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(INCLUDES_LIST) + foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) + list(APPEND INCLUDES_LIST "-I${dir}") + endforeach() + string(JOIN " " INCLUDES "${INCLUDES_LIST}") +endif() + add_custom_command( OUTPUT ${CUDA_FATBIN_FILE} COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/CMakeLists.txt b/Samples/3_CUDA_Features/bf16TensorCoreGemm/CMakeLists.txt index c395548b..3e949ec6 100644 --- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/CMakeLists.txt +++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/CMakeLists.txt @@ -18,6 +18,12 @@ endif() # Include directories and libraries include_directories(../../../Common) +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "Will not build sample ${PROJECT_NAME} - not supported on QNX") + return() +endif() + # Source file # Add target for bf16TensorCoreGemm add_executable(bf16TensorCoreGemm bf16TensorCoreGemm.cu) diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/CMakeLists.txt b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/CMakeLists.txt index a48c1c6b..cb94a68b 100644 --- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/CMakeLists.txt +++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/CMakeLists.txt @@ -17,6 +17,12 @@ else() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo") # add line information to all builds for debug tools (exclusive to -G option) endif() +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "Will not build sample ${PROJECT_NAME} - not supported on QNX") + return() +endif() + # Include directories and libraries include_directories(../../../Common) diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/CMakeLists.txt b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/CMakeLists.txt index e2a790ed..caf971b0 100644 --- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/CMakeLists.txt +++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/CMakeLists.txt @@ -20,6 +20,12 @@ endif() # Include directories and libraries include_directories(../../../Common) +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "Will not build sample ${PROJECT_NAME} - not supported on QNX") + return() +endif() + # Source file # Add target for globalToShmemAsyncCopy add_executable(globalToShmemAsyncCopy globalToShmemAsyncCopy.cu) diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/CMakeLists.txt b/Samples/3_CUDA_Features/tf32TensorCoreGemm/CMakeLists.txt index 139b63a7..084d52f5 100644 --- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/CMakeLists.txt +++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/CMakeLists.txt @@ -20,6 +20,12 @@ endif() # Include directories and libraries include_directories(../../../Common) +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "Will not build sample ${PROJECT_NAME} - not supported on QNX") + return() +endif() + # Source file # Add target for tf32TensorCoreGemm add_executable(tf32TensorCoreGemm tf32TensorCoreGemm.cu) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt index 924e640f..3d67e92c 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt @@ -21,6 +21,12 @@ endif() # Include directories and libraries include_directories(../../../Common) +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "Will not build sample ${PROJECT_NAME} - not supported on QNX") + return() +endif() + # Source file # Add target for conjugateGradientMultiBlockCG add_executable(conjugateGradientMultiBlockCG conjugateGradientMultiBlockCG.cu) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/CMakeLists.txt b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/CMakeLists.txt index 3d02538d..ccf76ee7 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/CMakeLists.txt @@ -20,6 +20,12 @@ endif() # Include directories and libraries include_directories(../../../Common) +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "Will not build sample ${PROJECT_NAME} - not supported on QNX") + return() +endif() + # Source file # Add target for conjugateGradientMultiDeviceCG add_executable(conjugateGradientMultiDeviceCG conjugateGradientMultiDeviceCG.cu) diff --git a/Samples/4_CUDA_Libraries/jitLto/CMakeLists.txt b/Samples/4_CUDA_Libraries/jitLto/CMakeLists.txt index 42520c7d..23c33291 100644 --- a/Samples/4_CUDA_Libraries/jitLto/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/jitLto/CMakeLists.txt @@ -19,6 +19,12 @@ endif() # Include directories and libraries include_directories(../../../Common) +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "Will not build sample ${PROJECT_NAME} - not supported on QNX") + return() +endif() + # Source file # Add target for jitLto add_executable(jitLto jitLto.cpp) diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/CMakeLists.txt b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/CMakeLists.txt index cd38056a..d9116f36 100644 --- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/CMakeLists.txt @@ -19,6 +19,12 @@ endif() # Include directories and libraries include_directories(../../../Common) +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "Will not build sample ${PROJECT_NAME} - not supported on QNX") + return() +endif() + # Source file # Add target for watershedSegmentationNPP add_executable(watershedSegmentationNPP watershedSegmentationNPP.cpp) diff --git a/cmake/toolchains/toolchain-aarch64-qnx.cmake b/cmake/toolchains/toolchain-aarch64-qnx.cmake index 98e6f3d0..55cf5fbe 100644 --- a/cmake/toolchains/toolchain-aarch64-qnx.cmake +++ b/cmake/toolchains/toolchain-aarch64-qnx.cmake @@ -1,80 +1,27 @@ -#============================================================================== -# Toolchain file for cross-compiling to aarch64 QNX -#============================================================================== - -# Cross-compiling, so tell CMake that we are not building for the host system set(CMAKE_SYSTEM_NAME QNX) - -# Target processor architecture set(CMAKE_SYSTEM_PROCESSOR aarch64) -#------------------------------------------------------------------------------ -# QNX host and target come from environment -# Adjust these or hard-code paths as needed: -# -# set(QNX_HOST "/path/to/qnx/host") # e.g. /qnx/qnx710/host/linux/x86_64 -# set(QNX_TARGET "/path/to/qnx/target") # e.g. /qnx/qnx710/target/qnx7 -# -# You can also pass them on the cmake command line: -# cmake -D QNX_HOST=/path/to/qnx/host \ -# -D QNX_TARGET=/path/to/qnx/target \ -# -DCMAKE_TOOLCHAIN_FILE=toolchain-aarch64-qnx.cmake .. -#------------------------------------------------------------------------------ +set(QNX_HOST $ENV{QNX_HOST}) +set(QNX_TARGET $ENV{QNX_TARGET}) -#---------------------------------------------------------------------------- -# C/C++ Compilers from QNX -#---------------------------------------------------------------------------- -find_program(QNX_QCC NAMES qcc PATHS "${QNX_HOST}/usr/bin") -find_program(QNX_QPLUS NAMES q++ PATHS "${QNX_HOST}/usr/bin") +message(STATUS "QNX_HOST = ${QNX_HOST}") +message(STATUS "QNX_TARGET = ${QNX_TARGET}") -if(NOT QNX_QCC OR NOT QNX_QPLUS) - message(FATAL_ERROR "Could not find qcc or q++ in QNX_HOST=${QNX_HOST}/usr/bin") -endif() +set(CMAKE_C_COMPILER ${QNX_HOST}/usr/bin/qcc) +set(CMAKE_CXX_COMPILER ${QNX_HOST}/usr/bin/q++) -set(CMAKE_C_COMPILER "${QNX_QCC}") -set(CMAKE_CXX_COMPILER "${QNX_QPLUS}") +set(CMAKE_C_COMPILER_TARGET aarch64) +set(CMAKE_CXX_COMPILER_TARGET aarch64) -#---------------------------------------------------------------------------- -# Sysroot (if you want CMake to know the default sysroot) -#---------------------------------------------------------------------------- -# This is optional, but convenient if the QNX headers/libraries must be found: -#---------------------------------------------------------------------------- -if(DEFINED QNX_TARGET) - set(CMAKE_SYSROOT "${QNX_TARGET}") -endif() +set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE STRING "" FORCE) +set(CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "-nodlink -L${CUDA_ROOT}/lib64 -L${CUDA_ROOT}/lib -I${CUDA_ROOT}/include") -#---------------------------------------------------------------------------- -# Additional preprocessor definitions & include paths -#---------------------------------------------------------------------------- -add_compile_options( - -D_QNX_SOURCE - -DWIN_INTERFACE_CUSTOM -) +set(CMAKE_C_FLAGS " \"-V${__qnx_gcc_ver},gcc_ntoaarch64le\"") +set(CMAKE_CXX_FLAGS " \"-V${__qnx_gcc_ver},gcc_ntoaarch64le\"") +set(CMAKE_CUDA_FLAGS " --qpp-config=${__qnx_gcc_ver},gcc_ntoaarch64le") +set(AUTOMAGIC_NVCC_FLAGS --qpp-config=${__qnx_gcc_ver},gcc_ntoaarch64le CACHE STRING "automagic feature detection flags for cross build") +add_link_options("-V${__qnx_gcc_ver},gcc_ntoaarch64le") -# Add an include path to /usr/include/aarch64-qnx-gnu: -include_directories("/usr/include/aarch64-qnx-gnu") - -#---------------------------------------------------------------------------- -# Linker flags -#---------------------------------------------------------------------------- -set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L/usr/lib/aarch64-qnx-gnu") - -# Because the Makefile also adds -Wl,-rpath-link,/usr/lib/aarch64-qnx-gnu: -set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-rpath-link,/usr/lib/aarch64-qnx-gnu") - -# If you have a “target filesystem” (TARGET_FS) to link with: -# -L$(TARGET_FS)/usr/lib -# -L$(TARGET_FS)/usr/libnvidia -# etc., you can optionally extend the link flags. For example: -# -# if(DEFINED TARGET_FS) -# set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} \ -# -L${TARGET_FS}/usr/lib -Wl,-rpath-link,${TARGET_FS}/usr/lib \ -# -L${TARGET_FS}/usr/libnvidia -Wl,-rpath-link,${TARGET_FS}/usr/libnvidia") -# include_directories("${TARGET_FS}/../include") -# endif() - -# If you need to link additional libraries, e.g. -lslog2 under certain conditions: -# list(APPEND EXTRA_LIBS "slog2") -# ... -#---------------------------------------------------------------------------- +set(CROSS_COMPILE_FOR_QNX ON CACHE BOOL "Cross compiling for QNX platforms") +string(APPEND CMAKE_CXX_FLAGS " -D_QNX_SOURCE") +string(APPEND CMAKE_CUDA_FLAGS " -D_QNX_SOURCE") From 5f6d46dfea4fca3121fa4cdc070c33828246c68e Mon Sep 17 00:00:00 2001 From: shawnz Date: Mon, 9 Jun 2025 19:49:44 +0800 Subject: [PATCH 02/12] Bug 5323018: Update the CMakeLists.txt and Common/helper_multiprocess.cpp of ptxjit and memMapIPCDrv for QNX cross build --- Common/helper_multiprocess.cpp | 8 ++++---- .../3_CUDA_Features/memMapIPCDrv/CMakeLists.txt | 14 +++++++++++++- Samples/3_CUDA_Features/ptxjit/CMakeLists.txt | 8 ++++++++ 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/Common/helper_multiprocess.cpp b/Common/helper_multiprocess.cpp index 61fe0855..801b205d 100644 --- a/Common/helper_multiprocess.cpp +++ b/Common/helper_multiprocess.cpp @@ -186,7 +186,7 @@ int ipcCreateSocket(ipcHandle *&handle, const char *name, } unlink(name); - bzero(&servaddr, sizeof(servaddr)); + memset(&servaddr, 0, sizeof(servaddr)); servaddr.sun_family = AF_UNIX; size_t len = strlen(name); @@ -220,7 +220,7 @@ int ipcOpenSocket(ipcHandle *&handle) { return -1; } - bzero(&cliaddr, sizeof(cliaddr)); + memset(&cliaddr, 0, sizeof(cliaddr)); cliaddr.sun_family = AF_UNIX; char temp[10]; @@ -326,7 +326,7 @@ int ipcSendDataToServer(ipcHandle *handle, const char *serverName, ssize_t sendResult; struct sockaddr_un serveraddr; - bzero(&serveraddr, sizeof(serveraddr)); + memset(&serveraddr, 0, sizeof(serveraddr)); serveraddr.sun_family = AF_UNIX; strncpy(serveraddr.sun_path, serverName, sizeof(serveraddr.sun_path) - 1); @@ -359,7 +359,7 @@ int ipcSendShareableHandle(ipcHandle *handle, socklen_t len = sizeof(cliaddr); // Construct client address to send this SHareable handle to - bzero(&cliaddr, sizeof(cliaddr)); + memset(&cliaddr, 0, sizeof(cliaddr)); cliaddr.sun_family = AF_UNIX; char temp[10]; sprintf(temp, "%u", process); diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt b/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt index 5e6c54bd..ce04a08e 100644 --- a/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt +++ b/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt @@ -37,13 +37,25 @@ target_link_libraries(memMapIPCDrv PUBLIC CUDA::cuda_driver ) -if(UNIX) +if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") target_link_libraries(memMapIPCDrv PUBLIC rt) endif() +if(${CMAKE_SYSTEM_NAME} STREQUAL "QNX") + target_link_libraries(memMapIPCDrv PUBLIC socket) +endif() + set(CUDA_PTX_FILE "${CMAKE_CURRENT_BINARY_DIR}/memMapIpc_kernel64.ptx") set(CUDA_KERNEL_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/memMapIpc_kernel.cu") +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(INCLUDES_LIST) + foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) + list(APPEND INCLUDES_LIST "-I${dir}") + endforeach() + string(JOIN " " INCLUDES "${INCLUDES_LIST}") +endif() + add_custom_command( OUTPUT ${CUDA_PTX_FILE} COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets -o ${CUDA_PTX_FILE} -ptx ${CUDA_KERNEL_SOURCE} diff --git a/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt b/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt index ea1de621..ea9e305f 100644 --- a/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt +++ b/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt @@ -41,6 +41,14 @@ target_link_libraries(ptxjit PUBLIC set(CUDA_PTX_FILE "${CMAKE_CURRENT_BINARY_DIR}/ptxjit_kernel64.ptx") set(CUDA_KERNEL_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ptxjit_kernel.cu") +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(INCLUDES_LIST) + foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) + list(APPEND INCLUDES_LIST "-I${dir}") + endforeach() + string(JOIN " " INCLUDES "${INCLUDES_LIST}") +endif() + add_custom_command( OUTPUT ${CUDA_PTX_FILE} COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets -o ${CUDA_PTX_FILE} -ptx ${CUDA_KERNEL_SOURCE} From 7f5390cec3ff9a0c472a713881ad8bb237a4be47 Mon Sep 17 00:00:00 2001 From: shawnz Date: Tue, 10 Jun 2025 15:14:33 +0800 Subject: [PATCH 03/12] Bug 5323124: Waive simpleAWBarrier on QNX --- Samples/0_Introduction/simpleAWBarrier/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Samples/0_Introduction/simpleAWBarrier/CMakeLists.txt b/Samples/0_Introduction/simpleAWBarrier/CMakeLists.txt index b21a485b..18f001e2 100644 --- a/Samples/0_Introduction/simpleAWBarrier/CMakeLists.txt +++ b/Samples/0_Introduction/simpleAWBarrier/CMakeLists.txt @@ -20,6 +20,12 @@ endif() # Include directories and libraries include_directories(../../../Common) +# This sample is not supported on QNX +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + message(STATUS "simpleAWBarrier is not supported on QNX") + return() +endif() + # Source file # Add target for simpleAWBarrier add_executable(simpleAWBarrier simpleAWBarrier.cu) From 6c9e9d3cd28b480dae4e4063c3b2dbb8bcfcc8a4 Mon Sep 17 00:00:00 2001 From: shawnz Date: Tue, 10 Jun 2025 15:15:56 +0800 Subject: [PATCH 04/12] Bug 5323163: Get correct cuda include path for finding header files --- Samples/0_Introduction/matrixMul_nvrtc/CMakeLists.txt | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Samples/0_Introduction/matrixMul_nvrtc/CMakeLists.txt b/Samples/0_Introduction/matrixMul_nvrtc/CMakeLists.txt index 1e7f6618..48c4d32d 100644 --- a/Samples/0_Introduction/matrixMul_nvrtc/CMakeLists.txt +++ b/Samples/0_Introduction/matrixMul_nvrtc/CMakeLists.txt @@ -32,23 +32,26 @@ target_link_libraries(matrixMul_nvrtc PRIVATE CUDA::cuda_driver ) +# The primary directory of CUDAToolkit_INCLUDE_DIRS is the CUDA Toolkit's include directory for finding the header files. +list(GET CUDAToolkit_INCLUDE_DIRS 0 CUDA_INCLUDE_DIR) + # Copy clock_kernel.cu to the output directory add_custom_command(TARGET matrixMul_nvrtc POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different - ${CMAKE_CURRENT_SOURCE_DIR}/matrixMul_kernel.cu ${CUDAToolkit_BIN_DIR}/../include/cooperative_groups.h ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/matrixMul_kernel.cu ${CUDA_INCLUDE_DIR}/cooperative_groups.h ${CMAKE_CURRENT_BINARY_DIR} ) add_custom_command(TARGET matrixMul_nvrtc POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory - ${CUDAToolkit_BIN_DIR}/../include/cooperative_groups ${CMAKE_CURRENT_BINARY_DIR}/cooperative_groups + ${CUDA_INCLUDE_DIR}/cooperative_groups ${CMAKE_CURRENT_BINARY_DIR}/cooperative_groups ) add_custom_command(TARGET matrixMul_nvrtc POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory - ${CUDAToolkit_BIN_DIR}/../include/cccl/nv ${CMAKE_CURRENT_BINARY_DIR}/nv + ${CUDA_INCLUDE_DIR}/cccl/nv ${CMAKE_CURRENT_BINARY_DIR}/nv ) add_custom_command(TARGET matrixMul_nvrtc POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory - ${CUDAToolkit_BIN_DIR}/../include/cccl/cuda ${CMAKE_CURRENT_BINARY_DIR}/cuda + ${CUDA_INCLUDE_DIR}/cccl/cuda ${CMAKE_CURRENT_BINARY_DIR}/cuda ) From de5fa98e6ec5a80cfe39026c89044387c8dc2ba5 Mon Sep 17 00:00:00 2001 From: shawnz Date: Tue, 10 Jun 2025 15:18:04 +0800 Subject: [PATCH 05/12] Bug 5323118: Remove the -lpthread and -lrt which are not supported on QNX --- .../2_Concepts_and_Techniques/threadMigration/CMakeLists.txt | 2 +- Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt index 9603b6c6..ab3ac87a 100644 --- a/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt @@ -37,7 +37,7 @@ target_link_libraries(threadMigration PUBLIC CUDA::cuda_driver ) -if(UNIX) +if(UNIX AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "QNX") target_link_libraries(threadMigration PUBLIC pthread) endif() diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt b/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt index ce04a08e..b2066843 100644 --- a/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt +++ b/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt @@ -37,7 +37,7 @@ target_link_libraries(memMapIPCDrv PUBLIC CUDA::cuda_driver ) -if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") +if(UNIX AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "QNX") target_link_libraries(memMapIPCDrv PUBLIC rt) endif() From 9075c50a3da761b3c54e38af7c60710b6bbc8079 Mon Sep 17 00:00:00 2001 From: shawnz Date: Tue, 10 Jun 2025 15:21:29 +0800 Subject: [PATCH 06/12] Bug 5323034 and 5323144: Disable .rsp for linking as qcc doesn't support lib path with double quotes in .rsp on QNX --- .../conjugateGradientCudaGraphs/CMakeLists.txt | 5 +++++ Samples/4_CUDA_Libraries/simpleCUFFT/CMakeLists.txt | 5 +++++ Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/CMakeLists.txt | 5 +++++ Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/CMakeLists.txt | 5 +++++ 4 files changed, 20 insertions(+) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/CMakeLists.txt b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/CMakeLists.txt index bcb09bd4..982542e7 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/CMakeLists.txt @@ -4,6 +4,11 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/Module project(conjugateGradientCudaGraphs LANGUAGES CUDA CXX) +# Disable response file for libraries on QNX as qcc does not support lib paths with double quotes +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_LIBRARIES OFF) +endif() + find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/CMakeLists.txt b/Samples/4_CUDA_Libraries/simpleCUFFT/CMakeLists.txt index dc0ed28e..5373206c 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/simpleCUFFT/CMakeLists.txt @@ -4,6 +4,11 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/Module project(simpleCUFFT LANGUAGES CUDA) +# Disable response file for libraries on QNX as qcc does not support lib paths with double quotes +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_LIBRARIES OFF) +endif() + find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/CMakeLists.txt b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/CMakeLists.txt index 25deba62..64783b2f 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/CMakeLists.txt @@ -4,6 +4,11 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/Module project(simpleCUFFT_2d_MGPU LANGUAGES CUDA) +# Disable response file for libraries on QNX as qcc does not support lib paths with double quotes +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_LIBRARIES OFF) +endif() + find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/CMakeLists.txt b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/CMakeLists.txt index 98c9b4bf..34aa1146 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/CMakeLists.txt +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/CMakeLists.txt @@ -4,6 +4,11 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/Module project(simpleCUFFT_MGPU LANGUAGES CUDA) +# Disable response file for libraries on QNX as qcc does not support lib paths with double quotes +if(CMAKE_SYSTEM_NAME STREQUAL "QNX") + set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_LIBRARIES OFF) +endif() + find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) From 9424c158484d9f2e5c0b1c87c751b489e6a7283a Mon Sep 17 00:00:00 2001 From: shawnz Date: Tue, 10 Jun 2025 15:33:07 +0800 Subject: [PATCH 07/12] Bug 5331767: Specify sm list of cdp samples for QNX --- .../cdpAdvancedQuicksort/CMakeLists.txt | 14 +++----------- .../cdpBezierTessellation/CMakeLists.txt | 5 +++-- Samples/3_CUDA_Features/cdpQuadtree/CMakeLists.txt | 5 +++-- .../3_CUDA_Features/cdpSimplePrint/CMakeLists.txt | 5 +++-- .../cdpSimpleQuicksort/CMakeLists.txt | 5 +++-- 5 files changed, 15 insertions(+), 19 deletions(-) diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt index 4d4784b0..c7a147c2 100644 --- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt +++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt @@ -9,22 +9,14 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) # The aarch64/sbsa_aarch64 CUDA toolkit are support on Tegra since 13.0, so need to check which version of the toolkit is installed -string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_ctk) -if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT _aarch64_ctk EQUAL -1) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_linux_ctk) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-qnx" _aarch64_qnx_ctk) +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND (NOT _aarch64_linux_ctk EQUAL -1 OR NOT _aarch64_qnx_ctk EQUAL -1)) set(CMAKE_CUDA_ARCHITECTURES 87 110) else() set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90 100 110 120) endif() -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") - -if(ENABLE_CUDA_DEBUG) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -maxrregcount=64") # Limit register usage to 64 for the 'big_bitonicsort kernel -else() - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo") # add line information to all builds for debug tools (exclusive to -G option) -endif() - # Include directories and libraries include_directories(../../../Common) diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/CMakeLists.txt b/Samples/3_CUDA_Features/cdpBezierTessellation/CMakeLists.txt index c5eb4703..fec2f807 100644 --- a/Samples/3_CUDA_Features/cdpBezierTessellation/CMakeLists.txt +++ b/Samples/3_CUDA_Features/cdpBezierTessellation/CMakeLists.txt @@ -9,8 +9,9 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) # The aarch64/sbsa_aarch64 CUDA toolkit are support on Tegra since 13.0, so need to check which version of the toolkit is installed -string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_ctk) -if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT _aarch64_ctk EQUAL -1) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_linux_ctk) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-qnx" _aarch64_qnx_ctk) +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND (NOT _aarch64_linux_ctk EQUAL -1 OR NOT _aarch64_qnx_ctk EQUAL -1)) set(CMAKE_CUDA_ARCHITECTURES 87 110) else() set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90 100 110 120) diff --git a/Samples/3_CUDA_Features/cdpQuadtree/CMakeLists.txt b/Samples/3_CUDA_Features/cdpQuadtree/CMakeLists.txt index a3fd2eba..85fe0c8c 100644 --- a/Samples/3_CUDA_Features/cdpQuadtree/CMakeLists.txt +++ b/Samples/3_CUDA_Features/cdpQuadtree/CMakeLists.txt @@ -9,8 +9,9 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) # The aarch64/sbsa_aarch64 CUDA toolkit are support on Tegra since 13.0, so need to check which version of the toolkit is installed -string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_ctk) -if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT _aarch64_ctk EQUAL -1) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_linux_ctk) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-qnx" _aarch64_qnx_ctk) +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND (NOT _aarch64_linux_ctk EQUAL -1 OR NOT _aarch64_qnx_ctk EQUAL -1)) set(CMAKE_CUDA_ARCHITECTURES 87 110) else() set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90 100 110 120) diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/CMakeLists.txt b/Samples/3_CUDA_Features/cdpSimplePrint/CMakeLists.txt index e58ee19a..726a555d 100644 --- a/Samples/3_CUDA_Features/cdpSimplePrint/CMakeLists.txt +++ b/Samples/3_CUDA_Features/cdpSimplePrint/CMakeLists.txt @@ -9,8 +9,9 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) # The aarch64/sbsa_aarch64 CUDA toolkit are support on Tegra since 13.0, so need to check which version of the toolkit is installed -string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_ctk) -if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT _aarch64_ctk EQUAL -1) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_linux_ctk) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-qnx" _aarch64_qnx_ctk) +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND (NOT _aarch64_linux_ctk EQUAL -1 OR NOT _aarch64_qnx_ctk EQUAL -1)) set(CMAKE_CUDA_ARCHITECTURES 87 110) else() set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90 100 110 120) diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/CMakeLists.txt b/Samples/3_CUDA_Features/cdpSimpleQuicksort/CMakeLists.txt index bbb61864..e6eabe4e 100644 --- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/CMakeLists.txt +++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/CMakeLists.txt @@ -9,8 +9,9 @@ find_package(CUDAToolkit REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) # The aarch64/sbsa_aarch64 CUDA toolkit are support on Tegra since 13.0, so need to check which version of the toolkit is installed -string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_ctk) -if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT _aarch64_ctk EQUAL -1) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-linux" _aarch64_linux_ctk) +string(FIND "${CUDAToolkit_INCLUDE_DIRS}" "aarch64-qnx" _aarch64_qnx_ctk) +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND (NOT _aarch64_linux_ctk EQUAL -1 OR NOT _aarch64_qnx_ctk EQUAL -1)) set(CMAKE_CUDA_ARCHITECTURES 87 110) else() set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90 100 110 120) From ce28796d6c3f105495dde3a430d8c34ee6f9ee29 Mon Sep 17 00:00:00 2001 From: shawnz Date: Wed, 11 Jun 2025 16:25:22 +0800 Subject: [PATCH 08/12] Bug 5189457: Disable -no-pie for hpc --- Samples/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt b/Samples/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt index 70296afc..3058893e 100644 --- a/Samples/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt +++ b/Samples/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt @@ -31,7 +31,12 @@ set_target_properties(matrixMulDynlinkJIT PROPERTIES CUDA_SEPARABLE_COMPILATION ON POSITION_INDEPENDENT_CODE OFF ) -set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie") + +# Only add -no-pie for GCC or Clang +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie") +endif() + target_link_libraries(matrixMulDynlinkJIT PUBLIC CUDA::cudart CUDA::cuda_driver From a47b422205a1be173e14ffa216e974b3d8c44a6a Mon Sep 17 00:00:00 2001 From: shawnz Date: Wed, 11 Jun 2025 16:32:09 +0800 Subject: [PATCH 09/12] Update CHANGELOG.md and README.md for QNX cross build --- CHANGELOG.md | 1 + README.md | 11 ++++++++--- cmake/toolchains/toolchain-aarch64-qnx.cmake | 14 ++++++++++++-- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 19cc5391..cb70c6f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -77,6 +77,7 @@ * `simpleCUFFT_2d_MGPU` * `simpleCUFFT_MGPU` * `simpleCUFFT_callback` +* Updated toolchain for cross-compilation for Tegra QNX platforms. ### CUDA 12.9 * Updated toolchain for cross-compilation for Tegra Linux platforms. diff --git a/README.md b/README.md index 6c45cca9..e8deb49e 100644 --- a/README.md +++ b/README.md @@ -135,9 +135,14 @@ $ cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_LIBRARY_PATH=/usr ### QNX -Note that in the current branch sample cross-compilation for QNX is not fully validated. This placeholder will be updated in the -near future with QNX cross-compilation instructions. In the meantime, if you want to cross-compile for QNX please check out one -of the previous tags prior to the CMake build system transition in 12.8. +The sample cross-compilation for QNX is supported since v13.0 CUDA Samples. An example build for Tegra Thor QNX platform might like this: + +``` +$ mkdir build +$ cd build + +QNX_HOST=/path/to/qnx/host QNX_TARGET=/path/to/qnx/target cmake .. -DBUILD_TEGRA=True -DCMAKE_CUDA_COMPILER=/usr/local/cuda-safe-13.0/bin/nvcc -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/toolchain-aarch64-qnx.cmake -DCMAKE_LIBRARY_PATH=/usr/local/cuda-safe-13.0/thor/targets/aarch64-qnx/lib/stubs/ -DCMAKE_INCLUDE_PATH=/usr/local/cuda-safe-13.0/thor/targets/aarch64-qnx/include/ +``` ## Running All Samples as Tests diff --git a/cmake/toolchains/toolchain-aarch64-qnx.cmake b/cmake/toolchains/toolchain-aarch64-qnx.cmake index 55cf5fbe..df9839b8 100644 --- a/cmake/toolchains/toolchain-aarch64-qnx.cmake +++ b/cmake/toolchains/toolchain-aarch64-qnx.cmake @@ -1,18 +1,28 @@ set(CMAKE_SYSTEM_NAME QNX) set(CMAKE_SYSTEM_PROCESSOR aarch64) +# Need to set the QNX_HOST and QNX_TARGET environment variables set(QNX_HOST $ENV{QNX_HOST}) set(QNX_TARGET $ENV{QNX_TARGET}) message(STATUS "QNX_HOST = ${QNX_HOST}") message(STATUS "QNX_TARGET = ${QNX_TARGET}") -set(CMAKE_C_COMPILER ${QNX_HOST}/usr/bin/qcc) -set(CMAKE_CXX_COMPILER ${QNX_HOST}/usr/bin/q++) +find_program(QNX_QCC NAMES qcc PATHS "${QNX_HOST}/usr/bin") +find_program(QNX_QPLUS NAMES q++ PATHS "${QNX_HOST}/usr/bin") + +if(NOT QNX_QCC OR NOT QNX_QPLUS) + message(FATAL_ERROR "Could not find qcc or q++ in QNX_HOST=${QNX_HOST}/usr/bin") +endif() + +# Specify the cross-compilers +set(CMAKE_C_COMPILER ${QNX_QCC}) +set(CMAKE_CXX_COMPILER ${QNX_QPLUS}) set(CMAKE_C_COMPILER_TARGET aarch64) set(CMAKE_CXX_COMPILER_TARGET aarch64) +# Set compiler flags set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE STRING "" FORCE) set(CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "-nodlink -L${CUDA_ROOT}/lib64 -L${CUDA_ROOT}/lib -I${CUDA_ROOT}/include") From e674cc36fe9e0c78c3a7de75698cf42709b3d249 Mon Sep 17 00:00:00 2001 From: shawnz Date: Fri, 13 Jun 2025 16:03:05 +0800 Subject: [PATCH 10/12] Bug 5339530: Set socket creating folder to /tmp for QNX --- Common/helper_multiprocess.cpp | 8 ++++---- Common/helper_multiprocess.h | 7 +++++++ Samples/0_Introduction/matrixMulDrv/CMakeLists.txt | 2 +- Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt | 2 +- Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt | 2 +- Samples/0_Introduction/vectorAddDrv/CMakeLists.txt | 2 +- .../threadMigration/CMakeLists.txt | 2 +- .../3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt | 9 +++++++++ Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt | 2 +- Samples/3_CUDA_Features/ptxjit/CMakeLists.txt | 2 +- 10 files changed, 27 insertions(+), 11 deletions(-) diff --git a/Common/helper_multiprocess.cpp b/Common/helper_multiprocess.cpp index 801b205d..1e2d9255 100644 --- a/Common/helper_multiprocess.cpp +++ b/Common/helper_multiprocess.cpp @@ -222,10 +222,10 @@ int ipcOpenSocket(ipcHandle *&handle) { memset(&cliaddr, 0, sizeof(cliaddr)); cliaddr.sun_family = AF_UNIX; - char temp[10]; + char temp[20]; // Create unique name for the socket. - sprintf(temp, "%u", getpid()); + sprintf(temp, "%s%u", SOCK_FOLDER, getpid()); strcpy(cliaddr.sun_path, temp); if (bind(sock, (struct sockaddr *)&cliaddr, sizeof(cliaddr)) < 0) { @@ -361,8 +361,8 @@ int ipcSendShareableHandle(ipcHandle *handle, // Construct client address to send this SHareable handle to memset(&cliaddr, 0, sizeof(cliaddr)); cliaddr.sun_family = AF_UNIX; - char temp[10]; - sprintf(temp, "%u", process); + char temp[20]; + sprintf(temp, "%s%u", SOCK_FOLDER, process); strcpy(cliaddr.sun_path, temp); len = sizeof(cliaddr); diff --git a/Common/helper_multiprocess.h b/Common/helper_multiprocess.h index 5c760718..d0bdcc20 100644 --- a/Common/helper_multiprocess.h +++ b/Common/helper_multiprocess.h @@ -54,6 +54,13 @@ #endif #include +// Define "/tmp" as socket creating folder for QNX +#if defined(__QNX__) +#define SOCK_FOLDER "/tmp/" +#else +#define SOCK_FOLDER "" +#endif + typedef struct sharedMemoryInfo_st { void *addr; size_t size; diff --git a/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt b/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt index 5cdb581c..c777fbd6 100644 --- a/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt +++ b/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt @@ -49,7 +49,7 @@ endforeach() if(CMAKE_SYSTEM_NAME STREQUAL "QNX") set(INCLUDES_LIST) foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) - list(APPEND INCLUDES_LIST "-I${dir}") + list(APPEND INCLUDES_LIST "-I${dir}") endforeach() string(JOIN " " INCLUDES "${INCLUDES_LIST}") endif() diff --git a/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt b/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt index 04684da2..0774b4d5 100644 --- a/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt +++ b/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt @@ -51,7 +51,7 @@ endforeach() if(CMAKE_SYSTEM_NAME STREQUAL "QNX") set(INCLUDES_LIST) foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) - list(APPEND INCLUDES_LIST "-I${dir}") + list(APPEND INCLUDES_LIST "-I${dir}") endforeach() string(JOIN " " INCLUDES "${INCLUDES_LIST}") endif() diff --git a/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt b/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt index befbfd05..19f7f079 100644 --- a/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt +++ b/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt @@ -48,7 +48,7 @@ endforeach() if(CMAKE_SYSTEM_NAME STREQUAL "QNX") set(INCLUDES_LIST) foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) - list(APPEND INCLUDES_LIST "-I${dir}") + list(APPEND INCLUDES_LIST "-I${dir}") endforeach() string(JOIN " " INCLUDES "${INCLUDES_LIST}") endif() diff --git a/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt b/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt index abbef92e..0b6a3477 100644 --- a/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt +++ b/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt @@ -48,7 +48,7 @@ endforeach() if(CMAKE_SYSTEM_NAME STREQUAL "QNX") set(INCLUDES_LIST) foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) - list(APPEND INCLUDES_LIST "-I${dir}") + list(APPEND INCLUDES_LIST "-I${dir}") endforeach() string(JOIN " " INCLUDES "${INCLUDES_LIST}") endif() diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt index ab3ac87a..cd04098a 100644 --- a/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt @@ -53,7 +53,7 @@ endforeach() if(CMAKE_SYSTEM_NAME STREQUAL "QNX") set(INCLUDES_LIST) foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) - list(APPEND INCLUDES_LIST "-I${dir}") + list(APPEND INCLUDES_LIST "-I${dir}") endforeach() string(JOIN " " INCLUDES "${INCLUDES_LIST}") endif() diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt index c7a147c2..edbd7698 100644 --- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt +++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt @@ -17,6 +17,15 @@ else() set(CMAKE_CUDA_ARCHITECTURES 75 80 86 89 90 100 110 120) endif() +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") + +if(ENABLE_CUDA_DEBUG) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -maxrregcount=64") # Limit register usage to 64 for the 'big_bitonicsort kernel +else() + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo") # add line information to all builds for debug tools (exclusive to -G option) +endif() + # Include directories and libraries include_directories(../../../Common) diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt b/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt index b2066843..b57e9187 100644 --- a/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt +++ b/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt @@ -51,7 +51,7 @@ set(CUDA_KERNEL_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/memMapIpc_kernel.cu") if(CMAKE_SYSTEM_NAME STREQUAL "QNX") set(INCLUDES_LIST) foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) - list(APPEND INCLUDES_LIST "-I${dir}") + list(APPEND INCLUDES_LIST "-I${dir}") endforeach() string(JOIN " " INCLUDES "${INCLUDES_LIST}") endif() diff --git a/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt b/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt index ea9e305f..575949b0 100644 --- a/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt +++ b/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt @@ -44,7 +44,7 @@ set(CUDA_KERNEL_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ptxjit_kernel.cu") if(CMAKE_SYSTEM_NAME STREQUAL "QNX") set(INCLUDES_LIST) foreach(dir ${CUDAToolkit_INCLUDE_DIRS}) - list(APPEND INCLUDES_LIST "-I${dir}") + list(APPEND INCLUDES_LIST "-I${dir}") endforeach() string(JOIN " " INCLUDES "${INCLUDES_LIST}") endif() From 225f84d4336da50dbafd1205b291ea3c2384570e Mon Sep 17 00:00:00 2001 From: shawnz Date: Wed, 18 Jun 2025 16:17:46 +0800 Subject: [PATCH 11/12] Bug 5295515: Update README.md for forward compatibility sample build --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index e8deb49e..6c681322 100644 --- a/README.md +++ b/README.md @@ -144,6 +144,14 @@ $ cd build QNX_HOST=/path/to/qnx/host QNX_TARGET=/path/to/qnx/target cmake .. -DBUILD_TEGRA=True -DCMAKE_CUDA_COMPILER=/usr/local/cuda-safe-13.0/bin/nvcc -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/toolchain-aarch64-qnx.cmake -DCMAKE_LIBRARY_PATH=/usr/local/cuda-safe-13.0/thor/targets/aarch64-qnx/lib/stubs/ -DCMAKE_INCLUDE_PATH=/usr/local/cuda-safe-13.0/thor/targets/aarch64-qnx/include/ ``` +### Forward Compatibility + +To build samples with new CUDA Toolkit(CUDA 13.0 or later) and UMD(version 580 or later) and old KMD(version 535 or earlier),you need to set the `CMAKE_PREFIX_PATH` for using new driver library, the command might like this: + +``` +cmake -DCMAKE_PREFIX_PATH=/usr/local/cuda/lib64/stubs/ .. +``` + ## Running All Samples as Tests It's important to note that the CUDA samples are _not_ intended as a validation suite for CUDA. They do not cover corner cases, they do not completely cover the From 0d6103184673fcf257fb5bd1f604ce42d1d1e51d Mon Sep 17 00:00:00 2001 From: shawnz Date: Wed, 18 Jun 2025 16:24:55 +0800 Subject: [PATCH 12/12] Bug 5355361: Update README.md of 7_libNVVM for forward compatibility sample build --- README.md | 2 +- Samples/7_libNVVM/README.md | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6c681322..9af7258f 100644 --- a/README.md +++ b/README.md @@ -146,7 +146,7 @@ QNX_HOST=/path/to/qnx/host QNX_TARGET=/path/to/qnx/target cmake .. -DBUILD_TEGRA ### Forward Compatibility -To build samples with new CUDA Toolkit(CUDA 13.0 or later) and UMD(version 580 or later) and old KMD(version 535 or earlier),you need to set the `CMAKE_PREFIX_PATH` for using new driver library, the command might like this: +To build samples with new CUDA Toolkit(CUDA 13.0 or later) and UMD(Version 580 or later) and old KMD(Version 550 or earlier),you need to set the `CMAKE_PREFIX_PATH` for using new driver library, the command might like this: ``` cmake -DCMAKE_PREFIX_PATH=/usr/local/cuda/lib64/stubs/ .. diff --git a/Samples/7_libNVVM/README.md b/Samples/7_libNVVM/README.md index 9aa39ed0..efb957ec 100644 --- a/Samples/7_libNVVM/README.md +++ b/Samples/7_libNVVM/README.md @@ -58,6 +58,12 @@ Alternatively, we provide a Makefile that will automatically build these samples on Linux as part of the toplevel cuda-samples build. Windows users should build manually via utils/built.bat or Visual Studio's CMake integration. +To build and run samples with new CUDA Toolkit(CUDA 13.0 or later) and UMD +(Version 580 or later) and old KMD(Version 550 or earlier),user need to +update the CMake invocation in utils/build.sh or build.bat with adding +`CMAKE_PREFIX_PATH` with stubs path like this: +"-DCMAKE_PREFIX_PATH=/usr/local/cuda/lib64/stubs/" + A Note About the cuda-c-linking Sample --------------------------------------