mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2026-01-08 10:27:49 +08:00
Merge 13.0 changes into 13.1 dev branch
This commit is contained in:
commit
775d1b4dd3
52
README.md
52
README.md
@ -133,6 +133,58 @@ $ cd build
|
||||
$ cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_LIBRARY_PATH=/usr/local/cuda/orin/lib64/ -DCMAKE_INCLUDE_PATH=/usr/local/cuda/orin/include -DBUILD_TEGRA=True ..
|
||||
```
|
||||
|
||||
### Cross Building for Automotive Linux Platforms from the DriveOS Docker containers
|
||||
|
||||
To build CUDA samples to the target platform from the DriveOS Docker containers, use the following instructions.
|
||||
|
||||
Mount the target Root Filesystem (RFS) in the container so that the CUDA cmake process has the correct paths to CUDA and other system libraries required to build the samples.
|
||||
|
||||
Create a temporary directory, `<temp>` is any temporary directory of your choosing, for example, you can use `/drive/temp`:
|
||||
|
||||
```
|
||||
$ mkdir /drive/<temp>
|
||||
```
|
||||
|
||||
Mount the filesystem by running the following command:
|
||||
|
||||
```
|
||||
$ mount /drive/drive-linux/filesystem/targetfs-images/dev_nsr_desktop_ubuntu-24.04_thor_rfs.img /drive/temp
|
||||
```
|
||||
|
||||
Configure the project by running the following cmake command:
|
||||
|
||||
```
|
||||
$ mkdir build && cd build
|
||||
$ cmake .. -DBUILD_TEGRA=True \
|
||||
-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
|
||||
-DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/toolchain-aarch64-linux.cmake \
|
||||
-DTARGET_FS=/drive/temp \
|
||||
-DCMAKE_LIBRARY_PATH=/drive/temp/usr/local/cuda-13.0/thor/lib64/ \
|
||||
-DCMAKE_INCLUDE_PATH=/drive/temp/usr/local/cuda-13.0/thor/include/
|
||||
```
|
||||
|
||||
Please note that the following libraries are not pre-installed in the DriveOS dev-nsr target filesystem:
|
||||
* libdrm-dev
|
||||
* Vulkan
|
||||
|
||||
This causes the cmake command to throw errors related to the missing files, and as a result, the related samples will not build in later steps. This issue will be addressed in a future DriveOS release.
|
||||
|
||||
To build the samples with ignore the error mentioned above, you can use `--ignore-errors`/`--keep-going` or comment out the comment out the corresponding `add_subdirectory` command in the CMakeLists.txt in the parent folder for the samples requiring Vulkan and libdrm_dev:
|
||||
|
||||
```
|
||||
$ make -j$(nproc) --ignore-errors # or --keep-going
|
||||
```
|
||||
|
||||
```
|
||||
# In Samples/5_Domain_Specific/CMakeList.txt
|
||||
# add_subdirectory(simpleGL)
|
||||
# add_subdirectory(simpleVulkan)
|
||||
# add_subdirectory(simpleVulkanMMAP)
|
||||
|
||||
# In Samples/8_Platform_Specific/Tegra/CMakeList.txt
|
||||
# add_subdirectory(simpleGLES_EGLOutput)
|
||||
```
|
||||
|
||||
### QNX
|
||||
|
||||
Cross-compilation for QNX with CMake is supported in the CUDA 13.0 samples release and newer. An example build for
|
||||
|
||||
@ -224,7 +224,7 @@ __global__ void compute_gemm(const half *A, const half *B, const float *C, float
|
||||
// there's no such tile, all warps in this CTA exit.
|
||||
for (unsigned int block_pos = blockIdx.x;; block_pos += gridDim.x) {
|
||||
const unsigned int block_tile_i = ((block_pos * BLOCK_ROW_TILES) / N_TILES) * (BLOCK_COL_TILES);
|
||||
const unsigned int block_tile_j = (block_pos * BLOCK_COL_TILES) % N_TILES;
|
||||
const unsigned int block_tile_j = (block_pos * BLOCK_ROW_TILES) % N_TILES;
|
||||
|
||||
// Stop when there are no more D matrix tiles to compute in this CTA.
|
||||
if (block_tile_i >= M_TILES) {
|
||||
|
||||
@ -4,7 +4,9 @@ libNVVM and NVVM IR Samples
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The following samples illustrate the use of libNVVM and NVVM IR.
|
||||
The following samples illustrate the use of libNVVM and NVVM IR. Running and
|
||||
testing these samples requires an NVIDIA driver compatible with the CUDA
|
||||
Toolkit being used for compilation.
|
||||
|
||||
- cuda-shared-memory - A directory containing NVVM IR programs that demonstrate
|
||||
CUDA 'shared' memory usage.
|
||||
@ -71,7 +73,7 @@ A Note About the cuda-c-linking Sample
|
||||
|
||||
This sample requires a development package (or locally-built) LLVM library
|
||||
between versions 7 to 14 inclusive. LLVM 15 defaults to using opaque pointers,
|
||||
which are currently not supported in libNVVM.
|
||||
which are not supported in libNVVM for pre-Blackwell architectures.
|
||||
|
||||
The LLVM_HOME environment variable is required for users who wish to build the
|
||||
cuda-c-linking sample and have a locally built copy of LLVM that they wish to
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# Copyright (c) 1993-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 1993-2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
@ -29,7 +29,7 @@ if (LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL "15" OR
|
||||
LLVM_PACKAGE_VERSION VERSION_LESS "7")
|
||||
message(STATUS "The cuda-c-linking sample is expected to build with "
|
||||
"LLVM development libraries v7 to v14, opaque pointers are "
|
||||
"not yet supported in libNVVM.")
|
||||
"not supported in libNVVM for pre-Blackwell architectures.")
|
||||
return()
|
||||
endif ()
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 1993-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
// Copyright (c) 1993-2025, NVIDIA CORPORATION. All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
@ -216,8 +216,8 @@ int main(int argc, char **argv)
|
||||
checkCudaErrors(cuDeviceGetAttribute(&devMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device));
|
||||
checkCudaErrors(cuDeviceGetAttribute(&devMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device));
|
||||
outs() << "Device Compute Capability: " << devMajor << "." << devMinor << "\n";
|
||||
if (devMajor < 5) {
|
||||
errs() << "ERROR: Device 0 is not sm_50 or later.\n";
|
||||
if (devMajor < 7 && devMinor < 5) {
|
||||
errs() << "ERROR: Device 0 is not sm_75 or later.\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2014-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
// Copyright (c) 2014-2025, NVIDIA CORPORATION. All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
@ -181,8 +181,8 @@ static CUdevice cudaDeviceInit(int *major, int *minor)
|
||||
checkCudaErrors(cuDeviceGetAttribute(major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice));
|
||||
checkCudaErrors(cuDeviceGetAttribute(minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));
|
||||
printf("compute capability = %d.%d\n", *major, *minor);
|
||||
if (*major < 5) {
|
||||
fprintf(stderr, "Device 0 is not sm_50 or later\n");
|
||||
if (*major < 7 && *minor < 5) {
|
||||
fprintf(stderr, "Device 0 is not sm_75 or later\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
return cuDevice;
|
||||
|
||||
@ -22,6 +22,6 @@ interleaved.
|
||||
|
||||
For example,
|
||||
|
||||
$ ptxgen a.ll -arch=compute_50 b.bc
|
||||
$ ptxgen a.ll -arch=compute_75 b.bc
|
||||
|
||||
links a.ll and b.bc, and generates PTX code for the compute_50 architecture.
|
||||
links a.ll and b.bc, and generates PTX code for the compute_75 architecture.
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 1993-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
// Copyright (c) 1993-2025, NVIDIA CORPORATION. All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
@ -74,8 +74,8 @@ static CUdevice cudaDeviceInit(int *devMajor, int *devMinor)
|
||||
|
||||
// Obtain the device's compute capability.
|
||||
checkCudaErrors(cuDeviceGetAttribute(devMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice));
|
||||
if (*devMajor < 5) {
|
||||
fprintf(stderr, "Device 0 is not sm_50 or later\n");
|
||||
if (*devMajor < 7 && *devMinor < 5) {
|
||||
fprintf(stderr, "Device 0 is not sm_75 or later\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
checkCudaErrors(cuDeviceGetAttribute(devMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2014-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
// Copyright (c) 2014-2025, NVIDIA CORPORATION. All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
@ -172,8 +172,8 @@ static CUdevice cudaDeviceInit(int *major, int *minor)
|
||||
checkCudaErrors(cuDeviceGetAttribute(major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice));
|
||||
checkCudaErrors(cuDeviceGetAttribute(minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));
|
||||
printf("compute capability = %d.%d\n", *major, *minor);
|
||||
if (*major < 5) {
|
||||
fprintf(stderr, "Device 0 is not sm_50 or later\n");
|
||||
if (*major < 7 && *minor < 5) {
|
||||
fprintf(stderr, "Device 0 is not sm_75 or later\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user