mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-08-23 23:40:31 +08:00
Merge branch 'shawnz_bug_fix' into 'master'
Bug fix for 5280038, 5277193, 5281036 and 5294720 See merge request cuda-samples/cuda-samples!112
This commit is contained in:
commit
8219570c15
20
CHANGELOG.md
20
CHANGELOG.md
@ -43,9 +43,12 @@
|
|||||||
* `4_CUDA_Libraries`
|
* `4_CUDA_Libraries`
|
||||||
* `jitLto`
|
* `jitLto`
|
||||||
* `7_libNVVM`
|
* `7_libNVVM`
|
||||||
|
* `cuda-c-linking`
|
||||||
* `device-side-launch`
|
* `device-side-launch`
|
||||||
* `simple`
|
* `simple`
|
||||||
* `uvmlite`
|
* `uvmlite`
|
||||||
|
* `8_Platform_Specific/Tegra`
|
||||||
|
* `EGLSync_CUDAEvent_Interop`
|
||||||
* Updated the sample using CUDA API "cudaGraphAddNode"/"cudaStreamGetCaptureInfo" with adding "cudaGraphEdgeData" pointer parameter as they are updated to "cudaGraphAddNode_v2"/"cudaStreamGetCaptureInfo_v3" by default in CUDA 13.0:
|
* Updated the sample using CUDA API "cudaGraphAddNode"/"cudaStreamGetCaptureInfo" with adding "cudaGraphEdgeData" pointer parameter as they are updated to "cudaGraphAddNode_v2"/"cudaStreamGetCaptureInfo_v3" by default in CUDA 13.0:
|
||||||
* `3_CUDA_Features`
|
* `3_CUDA_Features`
|
||||||
* `graphConditionalNodes`
|
* `graphConditionalNodes`
|
||||||
@ -57,6 +60,23 @@
|
|||||||
* Replaced "thrust::identity<uint>()" with "cuda::std::identity()" as it is deprecated in CUDA 13.0.
|
* Replaced "thrust::identity<uint>()" with "cuda::std::identity()" as it is deprecated in CUDA 13.0.
|
||||||
* `2_Concepts_and_Techniques`
|
* `2_Concepts_and_Techniques`
|
||||||
* `segmentationTreeThrust`
|
* `segmentationTreeThrust`
|
||||||
|
* Updated the the headers file and samples for CUFFT error codes update.
|
||||||
|
* Deprecated CUFFT errors:
|
||||||
|
* `CUFFT_INCOMPLETE_PARAMETER_LIST`
|
||||||
|
* `CUFFT_PARSE_ERROR`
|
||||||
|
* `CUFFT_LICENSE_ERROR`
|
||||||
|
* New added CUFFT errors:
|
||||||
|
* `CUFFT_MISSING_DEPENDENCY`
|
||||||
|
* `CUFFT_NVRTC_FAILURE`
|
||||||
|
* `CUFFT_NVJITLINK_FAILURE`
|
||||||
|
* `CUFFT_NVSHMEM_FAILURE`
|
||||||
|
* Header files and samples that are related with this change:
|
||||||
|
* `Common/helper_cuda.h`
|
||||||
|
* `4_CUDA_Libraries`
|
||||||
|
* `simpleCUFFT`
|
||||||
|
* `simpleCUFFT_2d_MGPU`
|
||||||
|
* `simpleCUFFT_MGPU`
|
||||||
|
* `simpleCUFFT_callback`
|
||||||
|
|
||||||
### CUDA 12.9
|
### CUDA 12.9
|
||||||
* Updated toolchain for cross-compilation for Tegra Linux platforms.
|
* Updated toolchain for cross-compilation for Tegra Linux platforms.
|
||||||
|
@ -147,6 +147,9 @@ static const char *_cudaGetErrorEnum(cufftResult error) {
|
|||||||
case CUFFT_NOT_IMPLEMENTED:
|
case CUFFT_NOT_IMPLEMENTED:
|
||||||
return "CUFFT_NOT_IMPLEMENTED";
|
return "CUFFT_NOT_IMPLEMENTED";
|
||||||
|
|
||||||
|
case CUFFT_NOT_SUPPORTED:
|
||||||
|
return "CUFFT_NOT_SUPPORTED";
|
||||||
|
|
||||||
case CUFFT_MISSING_DEPENDENCY:
|
case CUFFT_MISSING_DEPENDENCY:
|
||||||
return "CUFFT_MISSING_DEPENDENCY";
|
return "CUFFT_MISSING_DEPENDENCY";
|
||||||
|
|
||||||
@ -670,7 +673,10 @@ inline int _ConvertSMVer2Cores(int major, int minor) {
|
|||||||
{0x90, 128},
|
{0x90, 128},
|
||||||
{0xa0, 128},
|
{0xa0, 128},
|
||||||
{0xa1, 128},
|
{0xa1, 128},
|
||||||
|
{0xa3, 128},
|
||||||
|
{0xb0, 128},
|
||||||
{0xc0, 128},
|
{0xc0, 128},
|
||||||
|
{0xc1, 128},
|
||||||
{-1, -1}};
|
{-1, -1}};
|
||||||
|
|
||||||
int index = 0;
|
int index = 0;
|
||||||
@ -722,7 +728,10 @@ inline const char* _ConvertSMVer2ArchName(int major, int minor) {
|
|||||||
{0x90, "Hopper"},
|
{0x90, "Hopper"},
|
||||||
{0xa0, "Blackwell"},
|
{0xa0, "Blackwell"},
|
||||||
{0xa1, "Blackwell"},
|
{0xa1, "Blackwell"},
|
||||||
|
{0xa3, "Blackwell"},
|
||||||
|
{0xb0, "Blackwell"},
|
||||||
{0xc0, "Blackwell"},
|
{0xc0, "Blackwell"},
|
||||||
|
{0xc1, "Blackwell"},
|
||||||
{-1, "Graphics Device"}};
|
{-1, "Graphics Device"}};
|
||||||
|
|
||||||
int index = 0;
|
int index = 0;
|
||||||
|
@ -118,7 +118,10 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) {
|
|||||||
{0x90, 128},
|
{0x90, 128},
|
||||||
{0xa0, 128},
|
{0xa0, 128},
|
||||||
{0xa1, 128},
|
{0xa1, 128},
|
||||||
|
{0xa3, 128},
|
||||||
|
{0xb0, 128},
|
||||||
{0xc0, 128},
|
{0xc0, 128},
|
||||||
|
{0xc1, 128},
|
||||||
{-1, -1}};
|
{-1, -1}};
|
||||||
|
|
||||||
int index = 0;
|
int index = 0;
|
||||||
|
@ -23,6 +23,7 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
|||||||
|
|
||||||
if(ENABLE_CUDA_DEBUG)
|
if(ENABLE_CUDA_DEBUG)
|
||||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets)
|
||||||
|
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -maxrregcount=64") # Limit register usage to 64 for the 'big_bitonicsort kernel
|
||||||
else()
|
else()
|
||||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo") # add line information to all builds for debug tools (exclusive to -G option)
|
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo") # add line information to all builds for debug tools (exclusive to -G option)
|
||||||
endif()
|
endif()
|
||||||
|
@ -182,14 +182,6 @@ int runTest(int argc, char **argv)
|
|||||||
checkCudaErrors(cudaMemcpyFromSymbol(&hostCopyOfCallbackPtr, myOwnCallbackPtr, sizeof(hostCopyOfCallbackPtr)));
|
checkCudaErrors(cudaMemcpyFromSymbol(&hostCopyOfCallbackPtr, myOwnCallbackPtr, sizeof(hostCopyOfCallbackPtr)));
|
||||||
|
|
||||||
// Now associate the load callback with the plan.
|
// Now associate the load callback with the plan.
|
||||||
cufftResult status =
|
|
||||||
cufftXtSetCallback(cb_plan, (void **)&hostCopyOfCallbackPtr, CUFFT_CB_LD_COMPLEX, (void **)&d_params);
|
|
||||||
if (status == CUFFT_LICENSE_ERROR) {
|
|
||||||
printf("This sample requires a valid license file.\n");
|
|
||||||
printf("The file was either not found, out of date, or otherwise invalid.\n");
|
|
||||||
return EXIT_WAIVED;
|
|
||||||
}
|
|
||||||
|
|
||||||
checkCudaErrors(
|
checkCudaErrors(
|
||||||
cufftXtSetCallback(cb_plan, (void **)&hostCopyOfCallbackPtr, CUFFT_CB_LD_COMPLEX, (void **)&d_params));
|
cufftXtSetCallback(cb_plan, (void **)&hostCopyOfCallbackPtr, CUFFT_CB_LD_COMPLEX, (void **)&d_params));
|
||||||
|
|
||||||
|
@ -248,7 +248,7 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
// Create the CUDA context.
|
// Create the CUDA context.
|
||||||
CUcontext context;
|
CUcontext context;
|
||||||
checkCudaErrors(cuCtxCreate(&context, 0, device));
|
checkCudaErrors(cuCtxCreate(&context, NULL, 0, device));
|
||||||
|
|
||||||
// Create a JIT linker and generate the result CUBIN.
|
// Create a JIT linker and generate the result CUBIN.
|
||||||
CUlinkState linker;
|
CUlinkState linker;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user