Merge 13.0 changes into 13.1 dev branch

This commit is contained in:
Rob Armstrong 2025-07-29 14:14:27 -07:00
commit 775d1b4dd3
9 changed files with 73 additions and 19 deletions

View File

@ -133,6 +133,58 @@ $ cd build
$ cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_LIBRARY_PATH=/usr/local/cuda/orin/lib64/ -DCMAKE_INCLUDE_PATH=/usr/local/cuda/orin/include -DBUILD_TEGRA=True ..
```
### Cross Building for Automotive Linux Platforms from the DriveOS Docker containers
To build CUDA samples to the target platform from the DriveOS Docker containers, use the following instructions.
Mount the target Root Filesystem (RFS) in the container so that the CUDA cmake process has the correct paths to CUDA and other system libraries required to build the samples.
Create a temporary directory, `<temp>` is any temporary directory of your choosing, for example, you can use `/drive/temp`:
```
$ mkdir /drive/<temp>
```
Mount the filesystem by running the following command:
```
$ mount /drive/drive-linux/filesystem/targetfs-images/dev_nsr_desktop_ubuntu-24.04_thor_rfs.img /drive/temp
```
Configure the project by running the following cmake command:
```
$ mkdir build && cd build
$ cmake .. -DBUILD_TEGRA=True \
-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
-DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/toolchain-aarch64-linux.cmake \
-DTARGET_FS=/drive/temp \
-DCMAKE_LIBRARY_PATH=/drive/temp/usr/local/cuda-13.0/thor/lib64/ \
-DCMAKE_INCLUDE_PATH=/drive/temp/usr/local/cuda-13.0/thor/include/
```
Please note that the following libraries are not pre-installed in the DriveOS dev-nsr target filesystem:
* libdrm-dev
* Vulkan
This causes the cmake command to throw errors related to the missing files, and as a result, the related samples will not build in later steps. This issue will be addressed in a future DriveOS release.
To build the samples with ignore the error mentioned above, you can use `--ignore-errors`/`--keep-going` or comment out the comment out the corresponding `add_subdirectory` command in the CMakeLists.txt in the parent folder for the samples requiring Vulkan and libdrm_dev:
```
$ make -j$(nproc) --ignore-errors # or --keep-going
```
```
# In Samples/5_Domain_Specific/CMakeList.txt
# add_subdirectory(simpleGL)
# add_subdirectory(simpleVulkan)
# add_subdirectory(simpleVulkanMMAP)
# In Samples/8_Platform_Specific/Tegra/CMakeList.txt
# add_subdirectory(simpleGLES_EGLOutput)
```
### QNX
Cross-compilation for QNX with CMake is supported in the CUDA 13.0 samples release and newer. An example build for

View File

@ -224,7 +224,7 @@ __global__ void compute_gemm(const half *A, const half *B, const float *C, float
// there's no such tile, all warps in this CTA exit.
for (unsigned int block_pos = blockIdx.x;; block_pos += gridDim.x) {
const unsigned int block_tile_i = ((block_pos * BLOCK_ROW_TILES) / N_TILES) * (BLOCK_COL_TILES);
const unsigned int block_tile_j = (block_pos * BLOCK_COL_TILES) % N_TILES;
const unsigned int block_tile_j = (block_pos * BLOCK_ROW_TILES) % N_TILES;
// Stop when there are no more D matrix tiles to compute in this CTA.
if (block_tile_i >= M_TILES) {

View File

@ -4,7 +4,9 @@ libNVVM and NVVM IR Samples
Introduction
------------
The following samples illustrate the use of libNVVM and NVVM IR.
The following samples illustrate the use of libNVVM and NVVM IR. Running and
testing these samples requires an NVIDIA driver compatible with the CUDA
Toolkit being used for compilation.
- cuda-shared-memory - A directory containing NVVM IR programs that demonstrate
CUDA 'shared' memory usage.
@ -71,7 +73,7 @@ A Note About the cuda-c-linking Sample
This sample requires a development package (or locally-built) LLVM library
between versions 7 to 14 inclusive. LLVM 15 defaults to using opaque pointers,
which are currently not supported in libNVVM.
which are not supported in libNVVM for pre-Blackwell architectures.
The LLVM_HOME environment variable is required for users who wish to build the
cuda-c-linking sample and have a locally built copy of LLVM that they wish to

View File

@ -1,4 +1,4 @@
# Copyright (c) 1993-2023, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 1993-2025, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
@ -29,7 +29,7 @@ if (LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL "15" OR
LLVM_PACKAGE_VERSION VERSION_LESS "7")
message(STATUS "The cuda-c-linking sample is expected to build with "
"LLVM development libraries v7 to v14, opaque pointers are "
"not yet supported in libNVVM.")
"not supported in libNVVM for pre-Blackwell architectures.")
return()
endif ()

View File

@ -1,4 +1,4 @@
// Copyright (c) 1993-2023, NVIDIA CORPORATION. All rights reserved.
// Copyright (c) 1993-2025, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
@ -216,8 +216,8 @@ int main(int argc, char **argv)
checkCudaErrors(cuDeviceGetAttribute(&devMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device));
checkCudaErrors(cuDeviceGetAttribute(&devMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device));
outs() << "Device Compute Capability: " << devMajor << "." << devMinor << "\n";
if (devMajor < 5) {
errs() << "ERROR: Device 0 is not sm_50 or later.\n";
if (devMajor < 7 && devMinor < 5) {
errs() << "ERROR: Device 0 is not sm_75 or later.\n";
return 1;
}

View File

@ -1,4 +1,4 @@
// Copyright (c) 2014-2023, NVIDIA CORPORATION. All rights reserved.
// Copyright (c) 2014-2025, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
@ -181,8 +181,8 @@ static CUdevice cudaDeviceInit(int *major, int *minor)
checkCudaErrors(cuDeviceGetAttribute(major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice));
checkCudaErrors(cuDeviceGetAttribute(minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));
printf("compute capability = %d.%d\n", *major, *minor);
if (*major < 5) {
fprintf(stderr, "Device 0 is not sm_50 or later\n");
if (*major < 7 && *minor < 5) {
fprintf(stderr, "Device 0 is not sm_75 or later\n");
exit(EXIT_FAILURE);
}
return cuDevice;

View File

@ -22,6 +22,6 @@ interleaved.
For example,
$ ptxgen a.ll -arch=compute_50 b.bc
$ ptxgen a.ll -arch=compute_75 b.bc
links a.ll and b.bc, and generates PTX code for the compute_50 architecture.
links a.ll and b.bc, and generates PTX code for the compute_75 architecture.

View File

@ -1,4 +1,4 @@
// Copyright (c) 1993-2023, NVIDIA CORPORATION. All rights reserved.
// Copyright (c) 1993-2025, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
@ -74,8 +74,8 @@ static CUdevice cudaDeviceInit(int *devMajor, int *devMinor)
// Obtain the device's compute capability.
checkCudaErrors(cuDeviceGetAttribute(devMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice));
if (*devMajor < 5) {
fprintf(stderr, "Device 0 is not sm_50 or later\n");
if (*devMajor < 7 && *devMinor < 5) {
fprintf(stderr, "Device 0 is not sm_75 or later\n");
exit(EXIT_FAILURE);
}
checkCudaErrors(cuDeviceGetAttribute(devMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));

View File

@ -1,4 +1,4 @@
// Copyright (c) 2014-2023, NVIDIA CORPORATION. All rights reserved.
// Copyright (c) 2014-2025, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
@ -172,8 +172,8 @@ static CUdevice cudaDeviceInit(int *major, int *minor)
checkCudaErrors(cuDeviceGetAttribute(major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice));
checkCudaErrors(cuDeviceGetAttribute(minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));
printf("compute capability = %d.%d\n", *major, *minor);
if (*major < 5) {
fprintf(stderr, "Device 0 is not sm_50 or later\n");
if (*major < 7 && *minor < 5) {
fprintf(stderr, "Device 0 is not sm_75 or later\n");
exit(EXIT_FAILURE);
}