diff --git a/README.md b/README.md index 71aaba56..f2b67574 100644 --- a/README.md +++ b/README.md @@ -133,6 +133,58 @@ $ cd build $ cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_LIBRARY_PATH=/usr/local/cuda/orin/lib64/ -DCMAKE_INCLUDE_PATH=/usr/local/cuda/orin/include -DBUILD_TEGRA=True .. ``` +### Cross Building for Automotive Linux Platforms from the DriveOS Docker containers + +To build CUDA samples to the target platform from the DriveOS Docker containers, use the following instructions. + +Mount the target Root Filesystem (RFS) in the container so that the CUDA cmake process has the correct paths to CUDA and other system libraries required to build the samples. + +Create a temporary directory, `` is any temporary directory of your choosing, for example, you can use `/drive/temp`: + +``` +$ mkdir /drive/ +``` + +Mount the filesystem by running the following command: + +``` +$ mount /drive/drive-linux/filesystem/targetfs-images/dev_nsr_desktop_ubuntu-24.04_thor_rfs.img /drive/temp +``` + +Configure the project by running the following cmake command: + +``` +$ mkdir build && cd build +$ cmake .. -DBUILD_TEGRA=True \ + -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \ + -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/toolchain-aarch64-linux.cmake \ + -DTARGET_FS=/drive/temp \ + -DCMAKE_LIBRARY_PATH=/drive/temp/usr/local/cuda-13.0/thor/lib64/ \ + -DCMAKE_INCLUDE_PATH=/drive/temp/usr/local/cuda-13.0/thor/include/ +``` + +Please note that the following libraries are not pre-installed in the DriveOS dev-nsr target filesystem: +* libdrm-dev +* Vulkan + +This causes the cmake command to throw errors related to the missing files, and as a result, the related samples will not build in later steps. This issue will be addressed in a future DriveOS release. + +To build the samples with ignore the error mentioned above, you can use `--ignore-errors`/`--keep-going` or comment out the comment out the corresponding `add_subdirectory` command in the CMakeLists.txt in the parent folder for the samples requiring Vulkan and libdrm_dev: + +``` +$ make -j$(nproc) --ignore-errors # or --keep-going +``` + +``` +# In Samples/5_Domain_Specific/CMakeList.txt +# add_subdirectory(simpleGL) +# add_subdirectory(simpleVulkan) +# add_subdirectory(simpleVulkanMMAP) + +# In Samples/8_Platform_Specific/Tegra/CMakeList.txt +# add_subdirectory(simpleGLES_EGLOutput) +``` + ### QNX Cross-compilation for QNX with CMake is supported in the CUDA 13.0 samples release and newer. An example build for diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm.cu b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm.cu index d6431f01..34dc96c9 100644 --- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm.cu +++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm.cu @@ -224,7 +224,7 @@ __global__ void compute_gemm(const half *A, const half *B, const float *C, float // there's no such tile, all warps in this CTA exit. for (unsigned int block_pos = blockIdx.x;; block_pos += gridDim.x) { const unsigned int block_tile_i = ((block_pos * BLOCK_ROW_TILES) / N_TILES) * (BLOCK_COL_TILES); - const unsigned int block_tile_j = (block_pos * BLOCK_COL_TILES) % N_TILES; + const unsigned int block_tile_j = (block_pos * BLOCK_ROW_TILES) % N_TILES; // Stop when there are no more D matrix tiles to compute in this CTA. if (block_tile_i >= M_TILES) { diff --git a/Samples/7_libNVVM/README.md b/Samples/7_libNVVM/README.md index 09fbe98c..cdabae0b 100644 --- a/Samples/7_libNVVM/README.md +++ b/Samples/7_libNVVM/README.md @@ -4,7 +4,9 @@ libNVVM and NVVM IR Samples Introduction ------------ -The following samples illustrate the use of libNVVM and NVVM IR. +The following samples illustrate the use of libNVVM and NVVM IR. Running and +testing these samples requires an NVIDIA driver compatible with the CUDA +Toolkit being used for compilation. - cuda-shared-memory - A directory containing NVVM IR programs that demonstrate CUDA 'shared' memory usage. @@ -71,7 +73,7 @@ A Note About the cuda-c-linking Sample This sample requires a development package (or locally-built) LLVM library between versions 7 to 14 inclusive. LLVM 15 defaults to using opaque pointers, -which are currently not supported in libNVVM. +which are not supported in libNVVM for pre-Blackwell architectures. The LLVM_HOME environment variable is required for users who wish to build the cuda-c-linking sample and have a locally built copy of LLVM that they wish to diff --git a/Samples/7_libNVVM/cuda-c-linking/CMakeLists.txt b/Samples/7_libNVVM/cuda-c-linking/CMakeLists.txt index 072b6aa5..0a6dea2e 100644 --- a/Samples/7_libNVVM/cuda-c-linking/CMakeLists.txt +++ b/Samples/7_libNVVM/cuda-c-linking/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 1993-2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 1993-2025, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -29,7 +29,7 @@ if (LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL "15" OR LLVM_PACKAGE_VERSION VERSION_LESS "7") message(STATUS "The cuda-c-linking sample is expected to build with " "LLVM development libraries v7 to v14, opaque pointers are " - "not yet supported in libNVVM.") + "not supported in libNVVM for pre-Blackwell architectures.") return() endif () diff --git a/Samples/7_libNVVM/cuda-c-linking/cuda-c-linking.cpp b/Samples/7_libNVVM/cuda-c-linking/cuda-c-linking.cpp index 3717c413..de7d1be1 100644 --- a/Samples/7_libNVVM/cuda-c-linking/cuda-c-linking.cpp +++ b/Samples/7_libNVVM/cuda-c-linking/cuda-c-linking.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 1993-2023, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 1993-2025, NVIDIA CORPORATION. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -216,8 +216,8 @@ int main(int argc, char **argv) checkCudaErrors(cuDeviceGetAttribute(&devMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device)); checkCudaErrors(cuDeviceGetAttribute(&devMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device)); outs() << "Device Compute Capability: " << devMajor << "." << devMinor << "\n"; - if (devMajor < 5) { - errs() << "ERROR: Device 0 is not sm_50 or later.\n"; + if (devMajor < 7 && devMinor < 5) { + errs() << "ERROR: Device 0 is not sm_75 or later.\n"; return 1; } diff --git a/Samples/7_libNVVM/device-side-launch/dsl.c b/Samples/7_libNVVM/device-side-launch/dsl.c index 17cfe835..3dbce390 100644 --- a/Samples/7_libNVVM/device-side-launch/dsl.c +++ b/Samples/7_libNVVM/device-side-launch/dsl.c @@ -1,4 +1,4 @@ -// Copyright (c) 2014-2023, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2014-2025, NVIDIA CORPORATION. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -181,8 +181,8 @@ static CUdevice cudaDeviceInit(int *major, int *minor) checkCudaErrors(cuDeviceGetAttribute(major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice)); checkCudaErrors(cuDeviceGetAttribute(minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice)); printf("compute capability = %d.%d\n", *major, *minor); - if (*major < 5) { - fprintf(stderr, "Device 0 is not sm_50 or later\n"); + if (*major < 7 && *minor < 5) { + fprintf(stderr, "Device 0 is not sm_75 or later\n"); exit(EXIT_FAILURE); } return cuDevice; diff --git a/Samples/7_libNVVM/ptxgen/README.md b/Samples/7_libNVVM/ptxgen/README.md index 8d838d29..d4ee4257 100644 --- a/Samples/7_libNVVM/ptxgen/README.md +++ b/Samples/7_libNVVM/ptxgen/README.md @@ -22,6 +22,6 @@ interleaved. For example, - $ ptxgen a.ll -arch=compute_50 b.bc + $ ptxgen a.ll -arch=compute_75 b.bc -links a.ll and b.bc, and generates PTX code for the compute_50 architecture. +links a.ll and b.bc, and generates PTX code for the compute_75 architecture. diff --git a/Samples/7_libNVVM/simple/simple.c b/Samples/7_libNVVM/simple/simple.c index 58c31d5b..12b8dcc2 100644 --- a/Samples/7_libNVVM/simple/simple.c +++ b/Samples/7_libNVVM/simple/simple.c @@ -1,4 +1,4 @@ -// Copyright (c) 1993-2023, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 1993-2025, NVIDIA CORPORATION. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -74,8 +74,8 @@ static CUdevice cudaDeviceInit(int *devMajor, int *devMinor) // Obtain the device's compute capability. checkCudaErrors(cuDeviceGetAttribute(devMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice)); - if (*devMajor < 5) { - fprintf(stderr, "Device 0 is not sm_50 or later\n"); + if (*devMajor < 7 && *devMinor < 5) { + fprintf(stderr, "Device 0 is not sm_75 or later\n"); exit(EXIT_FAILURE); } checkCudaErrors(cuDeviceGetAttribute(devMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice)); diff --git a/Samples/7_libNVVM/uvmlite/uvmlite.c b/Samples/7_libNVVM/uvmlite/uvmlite.c index f977e092..cfe61cd9 100644 --- a/Samples/7_libNVVM/uvmlite/uvmlite.c +++ b/Samples/7_libNVVM/uvmlite/uvmlite.c @@ -1,4 +1,4 @@ -// Copyright (c) 2014-2023, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2014-2025, NVIDIA CORPORATION. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -172,8 +172,8 @@ static CUdevice cudaDeviceInit(int *major, int *minor) checkCudaErrors(cuDeviceGetAttribute(major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice)); checkCudaErrors(cuDeviceGetAttribute(minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice)); printf("compute capability = %d.%d\n", *major, *minor); - if (*major < 5) { - fprintf(stderr, "Device 0 is not sm_50 or later\n"); + if (*major < 7 && *minor < 5) { + fprintf(stderr, "Device 0 is not sm_75 or later\n"); exit(EXIT_FAILURE); }