mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2026-01-09 11:22:51 +08:00
Merge 13.0 changes into 13.1 dev branch
This commit is contained in:
commit
775d1b4dd3
52
README.md
52
README.md
@ -133,6 +133,58 @@ $ cd build
|
|||||||
$ cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_LIBRARY_PATH=/usr/local/cuda/orin/lib64/ -DCMAKE_INCLUDE_PATH=/usr/local/cuda/orin/include -DBUILD_TEGRA=True ..
|
$ cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_LIBRARY_PATH=/usr/local/cuda/orin/lib64/ -DCMAKE_INCLUDE_PATH=/usr/local/cuda/orin/include -DBUILD_TEGRA=True ..
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Cross Building for Automotive Linux Platforms from the DriveOS Docker containers
|
||||||
|
|
||||||
|
To build CUDA samples to the target platform from the DriveOS Docker containers, use the following instructions.
|
||||||
|
|
||||||
|
Mount the target Root Filesystem (RFS) in the container so that the CUDA cmake process has the correct paths to CUDA and other system libraries required to build the samples.
|
||||||
|
|
||||||
|
Create a temporary directory, `<temp>` is any temporary directory of your choosing, for example, you can use `/drive/temp`:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ mkdir /drive/<temp>
|
||||||
|
```
|
||||||
|
|
||||||
|
Mount the filesystem by running the following command:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ mount /drive/drive-linux/filesystem/targetfs-images/dev_nsr_desktop_ubuntu-24.04_thor_rfs.img /drive/temp
|
||||||
|
```
|
||||||
|
|
||||||
|
Configure the project by running the following cmake command:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ mkdir build && cd build
|
||||||
|
$ cmake .. -DBUILD_TEGRA=True \
|
||||||
|
-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
|
||||||
|
-DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/toolchain-aarch64-linux.cmake \
|
||||||
|
-DTARGET_FS=/drive/temp \
|
||||||
|
-DCMAKE_LIBRARY_PATH=/drive/temp/usr/local/cuda-13.0/thor/lib64/ \
|
||||||
|
-DCMAKE_INCLUDE_PATH=/drive/temp/usr/local/cuda-13.0/thor/include/
|
||||||
|
```
|
||||||
|
|
||||||
|
Please note that the following libraries are not pre-installed in the DriveOS dev-nsr target filesystem:
|
||||||
|
* libdrm-dev
|
||||||
|
* Vulkan
|
||||||
|
|
||||||
|
This causes the cmake command to throw errors related to the missing files, and as a result, the related samples will not build in later steps. This issue will be addressed in a future DriveOS release.
|
||||||
|
|
||||||
|
To build the samples with ignore the error mentioned above, you can use `--ignore-errors`/`--keep-going` or comment out the comment out the corresponding `add_subdirectory` command in the CMakeLists.txt in the parent folder for the samples requiring Vulkan and libdrm_dev:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ make -j$(nproc) --ignore-errors # or --keep-going
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
# In Samples/5_Domain_Specific/CMakeList.txt
|
||||||
|
# add_subdirectory(simpleGL)
|
||||||
|
# add_subdirectory(simpleVulkan)
|
||||||
|
# add_subdirectory(simpleVulkanMMAP)
|
||||||
|
|
||||||
|
# In Samples/8_Platform_Specific/Tegra/CMakeList.txt
|
||||||
|
# add_subdirectory(simpleGLES_EGLOutput)
|
||||||
|
```
|
||||||
|
|
||||||
### QNX
|
### QNX
|
||||||
|
|
||||||
Cross-compilation for QNX with CMake is supported in the CUDA 13.0 samples release and newer. An example build for
|
Cross-compilation for QNX with CMake is supported in the CUDA 13.0 samples release and newer. An example build for
|
||||||
|
|||||||
@ -224,7 +224,7 @@ __global__ void compute_gemm(const half *A, const half *B, const float *C, float
|
|||||||
// there's no such tile, all warps in this CTA exit.
|
// there's no such tile, all warps in this CTA exit.
|
||||||
for (unsigned int block_pos = blockIdx.x;; block_pos += gridDim.x) {
|
for (unsigned int block_pos = blockIdx.x;; block_pos += gridDim.x) {
|
||||||
const unsigned int block_tile_i = ((block_pos * BLOCK_ROW_TILES) / N_TILES) * (BLOCK_COL_TILES);
|
const unsigned int block_tile_i = ((block_pos * BLOCK_ROW_TILES) / N_TILES) * (BLOCK_COL_TILES);
|
||||||
const unsigned int block_tile_j = (block_pos * BLOCK_COL_TILES) % N_TILES;
|
const unsigned int block_tile_j = (block_pos * BLOCK_ROW_TILES) % N_TILES;
|
||||||
|
|
||||||
// Stop when there are no more D matrix tiles to compute in this CTA.
|
// Stop when there are no more D matrix tiles to compute in this CTA.
|
||||||
if (block_tile_i >= M_TILES) {
|
if (block_tile_i >= M_TILES) {
|
||||||
|
|||||||
@ -4,7 +4,9 @@ libNVVM and NVVM IR Samples
|
|||||||
Introduction
|
Introduction
|
||||||
------------
|
------------
|
||||||
|
|
||||||
The following samples illustrate the use of libNVVM and NVVM IR.
|
The following samples illustrate the use of libNVVM and NVVM IR. Running and
|
||||||
|
testing these samples requires an NVIDIA driver compatible with the CUDA
|
||||||
|
Toolkit being used for compilation.
|
||||||
|
|
||||||
- cuda-shared-memory - A directory containing NVVM IR programs that demonstrate
|
- cuda-shared-memory - A directory containing NVVM IR programs that demonstrate
|
||||||
CUDA 'shared' memory usage.
|
CUDA 'shared' memory usage.
|
||||||
@ -71,7 +73,7 @@ A Note About the cuda-c-linking Sample
|
|||||||
|
|
||||||
This sample requires a development package (or locally-built) LLVM library
|
This sample requires a development package (or locally-built) LLVM library
|
||||||
between versions 7 to 14 inclusive. LLVM 15 defaults to using opaque pointers,
|
between versions 7 to 14 inclusive. LLVM 15 defaults to using opaque pointers,
|
||||||
which are currently not supported in libNVVM.
|
which are not supported in libNVVM for pre-Blackwell architectures.
|
||||||
|
|
||||||
The LLVM_HOME environment variable is required for users who wish to build the
|
The LLVM_HOME environment variable is required for users who wish to build the
|
||||||
cuda-c-linking sample and have a locally built copy of LLVM that they wish to
|
cuda-c-linking sample and have a locally built copy of LLVM that they wish to
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
# Copyright (c) 1993-2023, NVIDIA CORPORATION. All rights reserved.
|
# Copyright (c) 1993-2025, NVIDIA CORPORATION. All rights reserved.
|
||||||
#
|
#
|
||||||
# Redistribution and use in source and binary forms, with or without
|
# Redistribution and use in source and binary forms, with or without
|
||||||
# modification, are permitted provided that the following conditions
|
# modification, are permitted provided that the following conditions
|
||||||
@ -29,7 +29,7 @@ if (LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL "15" OR
|
|||||||
LLVM_PACKAGE_VERSION VERSION_LESS "7")
|
LLVM_PACKAGE_VERSION VERSION_LESS "7")
|
||||||
message(STATUS "The cuda-c-linking sample is expected to build with "
|
message(STATUS "The cuda-c-linking sample is expected to build with "
|
||||||
"LLVM development libraries v7 to v14, opaque pointers are "
|
"LLVM development libraries v7 to v14, opaque pointers are "
|
||||||
"not yet supported in libNVVM.")
|
"not supported in libNVVM for pre-Blackwell architectures.")
|
||||||
return()
|
return()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
// Copyright (c) 1993-2023, NVIDIA CORPORATION. All rights reserved.
|
// Copyright (c) 1993-2025, NVIDIA CORPORATION. All rights reserved.
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without
|
// Redistribution and use in source and binary forms, with or without
|
||||||
// modification, are permitted provided that the following conditions
|
// modification, are permitted provided that the following conditions
|
||||||
@ -216,8 +216,8 @@ int main(int argc, char **argv)
|
|||||||
checkCudaErrors(cuDeviceGetAttribute(&devMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device));
|
checkCudaErrors(cuDeviceGetAttribute(&devMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device));
|
||||||
checkCudaErrors(cuDeviceGetAttribute(&devMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device));
|
checkCudaErrors(cuDeviceGetAttribute(&devMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device));
|
||||||
outs() << "Device Compute Capability: " << devMajor << "." << devMinor << "\n";
|
outs() << "Device Compute Capability: " << devMajor << "." << devMinor << "\n";
|
||||||
if (devMajor < 5) {
|
if (devMajor < 7 && devMinor < 5) {
|
||||||
errs() << "ERROR: Device 0 is not sm_50 or later.\n";
|
errs() << "ERROR: Device 0 is not sm_75 or later.\n";
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
// Copyright (c) 2014-2023, NVIDIA CORPORATION. All rights reserved.
|
// Copyright (c) 2014-2025, NVIDIA CORPORATION. All rights reserved.
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without
|
// Redistribution and use in source and binary forms, with or without
|
||||||
// modification, are permitted provided that the following conditions
|
// modification, are permitted provided that the following conditions
|
||||||
@ -181,8 +181,8 @@ static CUdevice cudaDeviceInit(int *major, int *minor)
|
|||||||
checkCudaErrors(cuDeviceGetAttribute(major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice));
|
checkCudaErrors(cuDeviceGetAttribute(major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice));
|
||||||
checkCudaErrors(cuDeviceGetAttribute(minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));
|
checkCudaErrors(cuDeviceGetAttribute(minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));
|
||||||
printf("compute capability = %d.%d\n", *major, *minor);
|
printf("compute capability = %d.%d\n", *major, *minor);
|
||||||
if (*major < 5) {
|
if (*major < 7 && *minor < 5) {
|
||||||
fprintf(stderr, "Device 0 is not sm_50 or later\n");
|
fprintf(stderr, "Device 0 is not sm_75 or later\n");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
return cuDevice;
|
return cuDevice;
|
||||||
|
|||||||
@ -22,6 +22,6 @@ interleaved.
|
|||||||
|
|
||||||
For example,
|
For example,
|
||||||
|
|
||||||
$ ptxgen a.ll -arch=compute_50 b.bc
|
$ ptxgen a.ll -arch=compute_75 b.bc
|
||||||
|
|
||||||
links a.ll and b.bc, and generates PTX code for the compute_50 architecture.
|
links a.ll and b.bc, and generates PTX code for the compute_75 architecture.
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
// Copyright (c) 1993-2023, NVIDIA CORPORATION. All rights reserved.
|
// Copyright (c) 1993-2025, NVIDIA CORPORATION. All rights reserved.
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without
|
// Redistribution and use in source and binary forms, with or without
|
||||||
// modification, are permitted provided that the following conditions
|
// modification, are permitted provided that the following conditions
|
||||||
@ -74,8 +74,8 @@ static CUdevice cudaDeviceInit(int *devMajor, int *devMinor)
|
|||||||
|
|
||||||
// Obtain the device's compute capability.
|
// Obtain the device's compute capability.
|
||||||
checkCudaErrors(cuDeviceGetAttribute(devMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice));
|
checkCudaErrors(cuDeviceGetAttribute(devMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice));
|
||||||
if (*devMajor < 5) {
|
if (*devMajor < 7 && *devMinor < 5) {
|
||||||
fprintf(stderr, "Device 0 is not sm_50 or later\n");
|
fprintf(stderr, "Device 0 is not sm_75 or later\n");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
checkCudaErrors(cuDeviceGetAttribute(devMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));
|
checkCudaErrors(cuDeviceGetAttribute(devMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
// Copyright (c) 2014-2023, NVIDIA CORPORATION. All rights reserved.
|
// Copyright (c) 2014-2025, NVIDIA CORPORATION. All rights reserved.
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without
|
// Redistribution and use in source and binary forms, with or without
|
||||||
// modification, are permitted provided that the following conditions
|
// modification, are permitted provided that the following conditions
|
||||||
@ -172,8 +172,8 @@ static CUdevice cudaDeviceInit(int *major, int *minor)
|
|||||||
checkCudaErrors(cuDeviceGetAttribute(major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice));
|
checkCudaErrors(cuDeviceGetAttribute(major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice));
|
||||||
checkCudaErrors(cuDeviceGetAttribute(minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));
|
checkCudaErrors(cuDeviceGetAttribute(minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));
|
||||||
printf("compute capability = %d.%d\n", *major, *minor);
|
printf("compute capability = %d.%d\n", *major, *minor);
|
||||||
if (*major < 5) {
|
if (*major < 7 && *minor < 5) {
|
||||||
fprintf(stderr, "Device 0 is not sm_50 or later\n");
|
fprintf(stderr, "Device 0 is not sm_75 or later\n");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user