diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..a3062bea --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.vscode/* diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bd17b62..c619c923 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ ## Changelog +### CUDA 11.6 +* Added new folder structure for samples +* Added support of Visual Studio 2022 to all samples supported on [Windows](#windows-1). +* All CUDA samples are now only available on [GitHub](https://github.com/nvidia/cuda-samples). They are no longer available via CUDA toolkit. + ### CUDA 11.5 * Added `cuDLAHybridMode`. Demonstrate usage of cuDLA in hybrid mode. * Added `cuDLAStandaloneMode`. Demonstrate usage of cuDLA in standalone mode. diff --git a/Common/helper_cuda.h b/Common/helper_cuda.h index 98a5a7b6..f6bea97a 100644 --- a/Common/helper_cuda.h +++ b/Common/helper_cuda.h @@ -666,6 +666,7 @@ inline int _ConvertSMVer2Cores(int major, int minor) { {0x80, 64}, {0x86, 128}, {0x87, 128}, + {0x90, 128}, {-1, -1}}; int index = 0; @@ -712,6 +713,8 @@ inline const char* _ConvertSMVer2ArchName(int major, int minor) { {0x75, "Turing"}, {0x80, "Ampere"}, {0x86, "Ampere"}, + {0x87, "Ampere"}, + {0x90, "Hopper"}, {-1, "Graphics Device"}}; int index = 0; diff --git a/Common/helper_cuda_drvapi.h b/Common/helper_cuda_drvapi.h index f0362d64..80979b5b 100644 --- a/Common/helper_cuda_drvapi.h +++ b/Common/helper_cuda_drvapi.h @@ -114,6 +114,7 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) { {0x80, 64}, {0x86, 128}, {0x87, 128}, + {0x90, 128}, {-1, -1}}; int index = 0; diff --git a/README.md b/README.md index 7a37e198..354fa6a4 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ # CUDA Samples -Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads). +Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads). ## Release Notes This section describes the release notes for the CUDA Samples on GitHub only. -### CUDA 11.6 +### CUDA 11.8 * Added new folder structure for samples * Added support of Visual Studio 2022 to all samples supported on [Windows](#windows-1). * All CUDA samples are now only available on [GitHub](https://github.com/nvidia/cuda-samples). They are no longer available via CUDA toolkit. @@ -17,7 +17,7 @@ This section describes the release notes for the CUDA Samples on GitHub only. ### Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), and the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html). ### Getting the CUDA Samples @@ -263,4 +263,4 @@ Answers to frequently asked questions about CUDA can be found at http://develope ## Attributions -* Teapot image is obtained from [Wikimedia](https://en.wikipedia.org/wiki/File:Original_Utah_Teapot.jpg) and is licensed under the Creative Commons [Attribution-Share Alike 2.0](https://creativecommons.org/licenses/by-sa/2.0/deed.en) Generic license. The image is modified for samples use cases. \ No newline at end of file +* Teapot image is obtained from [Wikimedia](https://en.wikipedia.org/wiki/File:Original_Utah_Teapot.jpg) and is licensed under the Creative Commons [Attribution-Share Alike 2.0](https://creativecommons.org/licenses/by-sa/2.0/deed.en) Generic license. The image is modified for samples use cases. diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/Makefile b/Samples/0_Introduction/UnifiedMemoryStreams/Makefile index ba7b78b4..e244dab5 100644 --- a/Samples/0_Introduction/UnifiedMemoryStreams/Makefile +++ b/Samples/0_Introduction/UnifiedMemoryStreams/Makefile @@ -318,9 +318,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml b/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml index 033c1c50..744caa12 100644 --- a/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml +++ b/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml @@ -6,11 +6,11 @@ cudaStreamDestroy cudaFree cudaMallocManaged - cudaStreamCreate - cudaDeviceSynchronize cudaStreamAttachMemAsync cudaSetDevice + cudaDeviceSynchronize cudaStreamSynchronize + cudaStreamCreate cudaGetDeviceProperties @@ -70,6 +70,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/README.md b/Samples/0_Introduction/UnifiedMemoryStreams/README.md index 347649da..417cf3a0 100644 --- a/Samples/0_Introduction/UnifiedMemoryStreams/README.md +++ b/Samples/0_Introduction/UnifiedMemoryStreams/README.md @@ -10,7 +10,7 @@ CUDA Systems Integration, OpenMP, CUBLAS, Multithreading, Unified Memory, CUDA S ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaMallocManaged, cudaStreamCreate, cudaDeviceSynchronize, cudaStreamAttachMemAsync, cudaSetDevice, cudaStreamSynchronize, cudaGetDeviceProperties +cudaStreamDestroy, cudaFree, cudaMallocManaged, cudaStreamAttachMemAsync, cudaSetDevice, cudaDeviceSynchronize, cudaStreamSynchronize, cudaStreamCreate, cudaGetDeviceProperties ## Dependencies needed to build/run [OpenMP](../../../README.md#openmp), [UVM](../../../README.md#uvm), [CUBLAS](../../../README.md#cublas) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj index e5e99aac..9680c777 100644 --- a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj +++ b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/UnifiedMemoryStreams.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj index 75e8d36d..866e26dc 100644 --- a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj +++ b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/UnifiedMemoryStreams.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj index ba409655..07478748 100644 --- a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj +++ b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/UnifiedMemoryStreams.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/asyncAPI/Makefile b/Samples/0_Introduction/asyncAPI/Makefile index 885bbc8e..71bb4794 100644 --- a/Samples/0_Introduction/asyncAPI/Makefile +++ b/Samples/0_Introduction/asyncAPI/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/asyncAPI/NsightEclipse.xml b/Samples/0_Introduction/asyncAPI/NsightEclipse.xml index 6d0bbc62..d823ac8a 100644 --- a/Samples/0_Introduction/asyncAPI/NsightEclipse.xml +++ b/Samples/0_Introduction/asyncAPI/NsightEclipse.xml @@ -3,21 +3,21 @@ asyncAPI - cudaMemset + cudaProfilerStop + cudaMalloc + cudaMemcpyAsync cudaFree - cudaEventRecord cudaMallocHost cudaProfilerStart - cudaEventCreate - cudaEventElapsedTime cudaDeviceSynchronize + cudaEventRecord cudaFreeHost - cudaMalloc - cudaEventQuery - cudaProfilerStop + cudaMemset cudaEventDestroy - cudaMemcpyAsync + cudaEventQuery + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -59,6 +59,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/asyncAPI/README.md b/Samples/0_Introduction/asyncAPI/README.md index 81da4efc..7f4f3b42 100644 --- a/Samples/0_Introduction/asyncAPI/README.md +++ b/Samples/0_Introduction/asyncAPI/README.md @@ -10,7 +10,7 @@ Asynchronous Data Transfers, CUDA Streams and Events ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaEventRecord, cudaMallocHost, cudaProfilerStart, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaEventQuery, cudaProfilerStop, cudaEventDestroy, cudaMemcpyAsync, cudaGetDeviceProperties +cudaProfilerStop, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaMallocHost, cudaProfilerStart, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaMemset, cudaEventDestroy, cudaEventQuery, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj index f2de8d87..ccea698d 100644 --- a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj +++ b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/asyncAPI.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj index be2679b8..56489567 100644 --- a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj +++ b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/asyncAPI.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj index 9d98bdff..c4b23b8f 100644 --- a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj +++ b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/asyncAPI.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/c++11_cuda/Makefile b/Samples/0_Introduction/c++11_cuda/Makefile index f70e1ad6..d4c77f61 100644 --- a/Samples/0_Introduction/c++11_cuda/Makefile +++ b/Samples/0_Introduction/c++11_cuda/Makefile @@ -310,9 +310,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) @@ -363,7 +363,6 @@ run: build $(EXEC) ./c++11_cuda testrun: build - $(EXEC) ./c++11_cuda --dummy-test-param clean: rm -f c++11_cuda c++11_cuda.o diff --git a/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml b/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml index e9acaddc..ccb26ce1 100644 --- a/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml +++ b/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml @@ -7,9 +7,9 @@ cudaMalloc + cudaMemcpy cudaMemset cudaFree - cudaMemcpy whole @@ -31,9 +31,6 @@ true c++11_cuda.cu - - --dummy-test-param - CPP11 @@ -54,6 +51,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/c++11_cuda/README.md b/Samples/0_Introduction/c++11_cuda/README.md index 0ff9f23e..a889fb7c 100644 --- a/Samples/0_Introduction/c++11_cuda/README.md +++ b/Samples/0_Introduction/c++11_cuda/README.md @@ -10,7 +10,7 @@ CPP11 CUDA ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaMemset, cudaFree, cudaMemcpy +cudaMalloc, cudaMemcpy, cudaMemset, cudaFree ## Dependencies needed to build/run [CPP11](../../../README.md#cpp11) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj index 33d8ff13..705e575c 100644 --- a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj +++ b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/c++11_cuda.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj index 3d1bc27d..e4e93dee 100644 --- a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj +++ b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/c++11_cuda.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj index 9bf5532c..8133b615 100644 --- a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj +++ b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/c++11_cuda.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/clock/Makefile b/Samples/0_Introduction/clock/Makefile index dd832757..df4722cb 100644 --- a/Samples/0_Introduction/clock/Makefile +++ b/Samples/0_Introduction/clock/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/clock/NsightEclipse.xml b/Samples/0_Introduction/clock/NsightEclipse.xml index eee903b9..6d8cfb09 100644 --- a/Samples/0_Introduction/clock/NsightEclipse.xml +++ b/Samples/0_Introduction/clock/NsightEclipse.xml @@ -4,8 +4,8 @@ clock cudaMalloc - cudaFree cudaMemcpy + cudaFree whole @@ -47,6 +47,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/clock/README.md b/Samples/0_Introduction/clock/README.md index 98ffd744..11f9afd4 100644 --- a/Samples/0_Introduction/clock/README.md +++ b/Samples/0_Introduction/clock/README.md @@ -10,7 +10,7 @@ Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaMemcpy +cudaMalloc, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/clock/clock_vs2017.vcxproj b/Samples/0_Introduction/clock/clock_vs2017.vcxproj index ba348ad3..2350f365 100644 --- a/Samples/0_Introduction/clock/clock_vs2017.vcxproj +++ b/Samples/0_Introduction/clock/clock_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/clock.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/clock/clock_vs2019.vcxproj b/Samples/0_Introduction/clock/clock_vs2019.vcxproj index a20c90b7..6649beca 100644 --- a/Samples/0_Introduction/clock/clock_vs2019.vcxproj +++ b/Samples/0_Introduction/clock/clock_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/clock.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/clock/clock_vs2022.vcxproj b/Samples/0_Introduction/clock/clock_vs2022.vcxproj index 10e92347..4cf6b895 100644 --- a/Samples/0_Introduction/clock/clock_vs2022.vcxproj +++ b/Samples/0_Introduction/clock/clock_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/clock.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/clock_nvrtc/README.md b/Samples/0_Introduction/clock_nvrtc/README.md index 8f16c6d2..5e1dbf0f 100644 --- a/Samples/0_Introduction/clock_nvrtc/README.md +++ b/Samples/0_Introduction/clock_nvrtc/README.md @@ -10,7 +10,7 @@ Performance Strategies, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) cudaBlockSize, cudaGridSize @@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj index 03b11e36..ec582a9f 100644 --- a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj +++ b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj index 80f3f59d..e5b93b60 100644 --- a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj +++ b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj index 0cf812f4..825d8e05 100644 --- a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj +++ b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/concurrentKernels/Makefile b/Samples/0_Introduction/concurrentKernels/Makefile index 0073ee5e..e6e4e241 100644 --- a/Samples/0_Introduction/concurrentKernels/Makefile +++ b/Samples/0_Introduction/concurrentKernels/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml b/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml index dd564b83..edfb7ff5 100644 --- a/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml +++ b/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml @@ -3,22 +3,22 @@ concurrentKernels - cudaStreamWaitEvent cudaStreamDestroy - cudaFree - cudaEventRecord - cudaMallocHost - cudaStreamCreate - cudaEventCreate - cudaEventElapsedTime - cudaEventSynchronize - cudaFreeHost cudaMalloc - cudaEventCreateWithFlags - cudaEventDestroy cudaMemcpyAsync - cudaGetDeviceProperties + cudaFree + cudaMallocHost + cudaEventCreateWithFlags + cudaEventSynchronize + cudaEventRecord + cudaFreeHost cudaGetDevice + cudaStreamWaitEvent + cudaEventDestroy + cudaEventElapsedTime + cudaStreamCreate + cudaGetDeviceProperties + cudaEventCreate whole @@ -57,6 +57,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/concurrentKernels/README.md b/Samples/0_Introduction/concurrentKernels/README.md index 96816ffa..f83e3bdd 100644 --- a/Samples/0_Introduction/concurrentKernels/README.md +++ b/Samples/0_Introduction/concurrentKernels/README.md @@ -10,7 +10,7 @@ Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamWaitEvent, cudaStreamDestroy, cudaFree, cudaEventRecord, cudaMallocHost, cudaStreamCreate, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventCreateWithFlags, cudaEventDestroy, cudaMemcpyAsync, cudaGetDeviceProperties, cudaGetDevice +cudaStreamDestroy, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaMallocHost, cudaEventCreateWithFlags, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDevice, cudaStreamWaitEvent, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj index bb646789..59cad7eb 100644 --- a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj +++ b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/concurrentKernels.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj index 2830f310..faee059d 100644 --- a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj +++ b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/concurrentKernels.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj index a528a3fb..abf2d5e0 100644 --- a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj +++ b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/concurrentKernels.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/cppIntegration/Makefile b/Samples/0_Introduction/cppIntegration/Makefile index 19301286..ebe106e2 100644 --- a/Samples/0_Introduction/cppIntegration/Makefile +++ b/Samples/0_Introduction/cppIntegration/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/cppIntegration/NsightEclipse.xml b/Samples/0_Introduction/cppIntegration/NsightEclipse.xml index 9f70719b..9b5f9b41 100644 --- a/Samples/0_Introduction/cppIntegration/NsightEclipse.xml +++ b/Samples/0_Introduction/cppIntegration/NsightEclipse.xml @@ -4,8 +4,8 @@ cppIntegration cudaMalloc - cudaFree cudaMemcpy + cudaFree whole @@ -41,6 +41,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/cppIntegration/README.md b/Samples/0_Introduction/cppIntegration/README.md index 2ba64fd7..4ac48bcf 100644 --- a/Samples/0_Introduction/cppIntegration/README.md +++ b/Samples/0_Introduction/cppIntegration/README.md @@ -10,7 +10,7 @@ CPP-CUDA Integration ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaMemcpy +cudaMalloc, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj index e6846211..4070ae91 100644 --- a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj +++ b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cppIntegration.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj index a57aa19b..67d587aa 100644 --- a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj +++ b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cppIntegration.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj index 26e82803..8ed0d991 100644 --- a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj +++ b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cppIntegration.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/0_Introduction/cppOverload/Makefile b/Samples/0_Introduction/cppOverload/Makefile index cfd5ec9c..a76aca05 100644 --- a/Samples/0_Introduction/cppOverload/Makefile +++ b/Samples/0_Introduction/cppOverload/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/cppOverload/NsightEclipse.xml b/Samples/0_Introduction/cppOverload/NsightEclipse.xml index 8c5b84a7..9ad898be 100644 --- a/Samples/0_Introduction/cppOverload/NsightEclipse.xml +++ b/Samples/0_Introduction/cppOverload/NsightEclipse.xml @@ -3,17 +3,17 @@ cppOverload - cudaFree + cudaMemcpy cudaFuncSetCacheConfig + cudaFree cudaMallocHost - cudaFuncGetAttributes - cudaGetDeviceCount + cudaSetDevice + cudaGetDeviceProperties cudaDeviceSynchronize cudaFreeHost cudaMalloc - cudaSetDevice - cudaMemcpy - cudaGetDeviceProperties + cudaFuncGetAttributes + cudaGetDeviceCount whole @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/cppOverload/README.md b/Samples/0_Introduction/cppOverload/README.md index 16b6adc1..bc583bfd 100644 --- a/Samples/0_Introduction/cppOverload/README.md +++ b/Samples/0_Introduction/cppOverload/README.md @@ -10,7 +10,7 @@ C++ Function Overloading, CUDA Streams and Events ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaFuncSetCacheConfig, cudaMallocHost, cudaFuncGetAttributes, cudaGetDeviceCount, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFuncSetCacheConfig, cudaFree, cudaMallocHost, cudaSetDevice, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceCount ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj b/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj index 0a082724..4adb6ea7 100644 --- a/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj +++ b/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cppOverload.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj b/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj index ba8bee01..040f08cf 100644 --- a/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj +++ b/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cppOverload.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj b/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj index 4e849ea5..a9592ff8 100644 --- a/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj +++ b/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cppOverload.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/cudaOpenMP/Makefile b/Samples/0_Introduction/cudaOpenMP/Makefile index 476ddbc9..277357e2 100644 --- a/Samples/0_Introduction/cudaOpenMP/Makefile +++ b/Samples/0_Introduction/cudaOpenMP/Makefile @@ -321,9 +321,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/cudaOpenMP/README.md b/Samples/0_Introduction/cudaOpenMP/README.md index 9f446d04..5a57d918 100644 --- a/Samples/0_Introduction/cudaOpenMP/README.md +++ b/Samples/0_Introduction/cudaOpenMP/README.md @@ -10,7 +10,7 @@ CUDA Systems Integration, OpenMP, Multithreading ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaGetDeviceCount, cudaSetDevice, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaGetErrorString, cudaFree, cudaGetLastError, cudaSetDevice, cudaGetDeviceCount, cudaGetDevice, cudaMemset, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [OpenMP](../../../README.md#openmp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj index 57636e0c..b6a822e0 100644 --- a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj +++ b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cudaOpenMP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj index 5e7d2b50..991ca21e 100644 --- a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj +++ b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cudaOpenMP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj index 8628c83b..adf14793 100644 --- a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj +++ b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cudaOpenMP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/fp16ScalarProduct/Makefile b/Samples/0_Introduction/fp16ScalarProduct/Makefile index c80fe84a..5dda1a89 100644 --- a/Samples/0_Introduction/fp16ScalarProduct/Makefile +++ b/Samples/0_Introduction/fp16ScalarProduct/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 60 61 70 75 80 86 +SMS ?= 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml b/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml index fed67440..045bce43 100644 --- a/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml +++ b/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml @@ -3,11 +3,11 @@ fp16ScalarProduct + cudaMemcpy cudaFree cudaMallocHost cudaFreeHost cudaMalloc - cudaMemcpy cudaGetDeviceProperties @@ -44,6 +44,7 @@ sm80 sm86 sm87 + sm90 arm diff --git a/Samples/0_Introduction/fp16ScalarProduct/README.md b/Samples/0_Introduction/fp16ScalarProduct/README.md index 3875a40a..4aa2b89c 100644 --- a/Samples/0_Introduction/fp16ScalarProduct/README.md +++ b/Samples/0_Introduction/fp16ScalarProduct/README.md @@ -10,7 +10,7 @@ CUDA Runtime API ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [FP16](../../../README.md#fp16) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj index f1199a8e..c4dbdc75 100644 --- a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj +++ b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/fp16ScalarProduct.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj index bcd5c50c..0b9a749f 100644 --- a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj +++ b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/fp16ScalarProduct.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj index c316ee62..ee4258a8 100644 --- a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj +++ b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/fp16ScalarProduct.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/matrixMul/Makefile b/Samples/0_Introduction/matrixMul/Makefile index a8b38ccd..a4d336b5 100644 --- a/Samples/0_Introduction/matrixMul/Makefile +++ b/Samples/0_Introduction/matrixMul/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/matrixMul/NsightEclipse.xml b/Samples/0_Introduction/matrixMul/NsightEclipse.xml index e90b6c7e..3f517967 100644 --- a/Samples/0_Introduction/matrixMul/NsightEclipse.xml +++ b/Samples/0_Introduction/matrixMul/NsightEclipse.xml @@ -3,20 +3,20 @@ matrixMul + cudaStreamCreateWithFlags + cudaProfilerStop + cudaMalloc cudaFree - cudaEventRecord cudaMallocHost cudaProfilerStart - cudaEventCreate - cudaEventElapsedTime cudaEventSynchronize + cudaEventRecord cudaFreeHost - cudaMalloc - cudaProfilerStop - cudaStreamCreateWithFlags - cudaEventDestroy cudaStreamSynchronize + cudaEventDestroy + cudaEventElapsedTime cudaMemcpyAsync + cudaEventCreate whole @@ -56,6 +56,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/matrixMul/README.md b/Samples/0_Introduction/matrixMul/README.md index c558141a..b0e121b2 100644 --- a/Samples/0_Introduction/matrixMul/README.md +++ b/Samples/0_Introduction/matrixMul/README.md @@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaEventRecord, cudaMallocHost, cudaProfilerStart, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaProfilerStop, cudaStreamCreateWithFlags, cudaEventDestroy, cudaStreamSynchronize, cudaMemcpyAsync +cudaStreamCreateWithFlags, cudaProfilerStop, cudaMalloc, cudaFree, cudaMallocHost, cudaProfilerStart, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaStreamSynchronize, cudaEventDestroy, cudaEventElapsedTime, cudaMemcpyAsync, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj b/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj index 5bc23eb0..95f6a03a 100644 --- a/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj +++ b/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/matrixMul.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj b/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj index 7373d385..375f668a 100644 --- a/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj +++ b/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/matrixMul.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj b/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj index 36e7c4e9..e406cc03 100644 --- a/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj +++ b/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/matrixMul.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/matrixMulDrv/Makefile b/Samples/0_Introduction/matrixMulDrv/Makefile index 794345b6..83476982 100644 --- a/Samples/0_Introduction/matrixMulDrv/Makefile +++ b/Samples/0_Introduction/matrixMulDrv/Makefile @@ -283,9 +283,9 @@ FATBIN_FILE := matrixMul_kernel${TARGET_SIZE}.fatbin # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/matrixMulDrv/README.md b/Samples/0_Introduction/matrixMulDrv/README.md index 804e7d81..682fb940 100644 --- a/Samples/0_Introduction/matrixMulDrv/README.md +++ b/Samples/0_Introduction/matrixMulDrv/README.md @@ -10,7 +10,7 @@ CUDA Driver API, Matrix Multiply ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuMemcpyHtoD, cuModuleLoadData, cuCtxCreate, cuLaunchKernel, cuDeviceGetName, cuMemAlloc, cuOccupancyMaxPotentialBlockSize, cuDeviceTotalMem, cuMemFree, cuMemcpyDtoH, cuCtxDestroy, cuDeviceGetAttribute +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuDeviceGetName, cuDeviceTotalMem, cuDeviceGetAttribute, cuModuleLoadData, cuOccupancyMaxPotentialBlockSize, cuMemAlloc, cuMemFree, cuCtxDestroy, cuModuleGetFunction, cuCtxCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj index ff911452..73998761 100644 --- a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj +++ b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/matrixMulDrv.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj index 1f6b88ad..0805c97c 100644 --- a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj +++ b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/matrixMulDrv.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj index e92ce9ce..a82bb699 100644 --- a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj +++ b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/matrixMulDrv.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/README.md b/Samples/0_Introduction/matrixMulDynlinkJIT/README.md index 18350964..657811d3 100644 --- a/Samples/0_Introduction/matrixMulDynlinkJIT/README.md +++ b/Samples/0_Introduction/matrixMulDynlinkJIT/README.md @@ -10,7 +10,7 @@ CUDA Driver API, CUDA Dynamically Linked Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuParamSetv, cuMemFree, cuInit, cuModuleGetFunction, cuCtxDestroy, cuCtxCreate, cuDeviceGetName, cuCtxSynchronize, cuParamSeti, cuModuleLoadDataEx, cuDeviceGet, cuFuncSetSharedSize, cuMemAlloc, cuDeviceComputeCapability, cuFuncSetBlockShape, cuMemcpyHtoD, cuParamSetSize, cuLaunchGrid, cuDeviceGetCount, cuLaunchKernel, cuMemcpyDtoH +cuMemcpyDtoH, cuDeviceGetName, cuParamSeti, cuModuleLoadDataEx, cuModuleGetFunction, cuLaunchGrid, cuFuncSetSharedSize, cuMemFree, cuParamSetSize, cuParamSetv, cuInit, cuMemcpyHtoD, cuLaunchKernel, cuDeviceGet, cuFuncSetBlockShape, cuCtxDestroy, cuDeviceGetCount, cuDeviceComputeCapability, cuCtxSynchronize, cuMemAlloc, cuCtxCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h b/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h index 5f69d332..4ca66fde 100644 --- a/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h +++ b/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h @@ -95,6 +95,7 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) { {0x80, 64}, {0x86, 128}, {0x87, 128}, + {0x90, 128}, {-1, -1}}; int index = 0; diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj index da13462f..8b146bde 100644 --- a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj +++ b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -116,6 +116,6 @@ - + diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj index 460bc3de..3fc6842c 100644 --- a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj +++ b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -112,6 +112,6 @@ - + diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj index d5ac5358..732e0b22 100644 --- a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj +++ b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -112,6 +112,6 @@ - + diff --git a/Samples/0_Introduction/matrixMul_nvrtc/README.md b/Samples/0_Introduction/matrixMul_nvrtc/README.md index 2cefe20e..224c3ee0 100644 --- a/Samples/0_Introduction/matrixMul_nvrtc/README.md +++ b/Samples/0_Introduction/matrixMul_nvrtc/README.md @@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction ## Dependencies needed to build/run [NVRTC](../../../README.md#nvrtc) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj index 1b4a7eb0..7833bb47 100644 --- a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj +++ b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -113,6 +113,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group - + diff --git a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj index cf0c66c8..d0b58366 100644 --- a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj +++ b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -109,6 +109,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group - + diff --git a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj index f6dc2b6f..6fa7922e 100644 --- a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj +++ b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -109,6 +109,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group - + diff --git a/Samples/0_Introduction/mergeSort/Makefile b/Samples/0_Introduction/mergeSort/Makefile index ad45af87..815268b1 100644 --- a/Samples/0_Introduction/mergeSort/Makefile +++ b/Samples/0_Introduction/mergeSort/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/mergeSort/NsightEclipse.xml b/Samples/0_Introduction/mergeSort/NsightEclipse.xml index 0a77b65e..55cab906 100644 --- a/Samples/0_Introduction/mergeSort/NsightEclipse.xml +++ b/Samples/0_Introduction/mergeSort/NsightEclipse.xml @@ -4,9 +4,9 @@ mergeSort cudaMalloc - cudaFree cudaDeviceSynchronize cudaMemcpy + cudaFree whole @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/mergeSort/README.md b/Samples/0_Introduction/mergeSort/README.md index d7ab7be2..d0853896 100644 --- a/Samples/0_Introduction/mergeSort/README.md +++ b/Samples/0_Introduction/mergeSort/README.md @@ -10,7 +10,7 @@ Data-Parallel Algorithms ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy +cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj b/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj index 9f5e39b3..0ef07013 100644 --- a/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj +++ b/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/mergeSort.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj b/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj index 8639bd2e..5796dda5 100644 --- a/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj +++ b/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/mergeSort.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj b/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj index c38e79d6..ed951e9f 100644 --- a/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj +++ b/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/mergeSort.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleAWBarrier/Makefile b/Samples/0_Introduction/simpleAWBarrier/Makefile index cd8dc51d..0fa1e665 100644 --- a/Samples/0_Introduction/simpleAWBarrier/Makefile +++ b/Samples/0_Introduction/simpleAWBarrier/Makefile @@ -316,9 +316,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 70 72 75 80 86 87 +SMS ?= 70 72 75 80 86 87 90 else -SMS ?= 70 75 80 86 +SMS ?= 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml b/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml index e8738f6a..87414f89 100644 --- a/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml @@ -6,17 +6,17 @@ --std=c++11 - cudaFree - cudaMallocHost - cudaOccupancyMaxActiveBlocksPerMultiprocessor - cudaOccupancyMaxPotentialBlockSize - cudaDeviceGetAttribute - cudaFreeHost - cudaMalloc cudaStreamCreateWithFlags - cudaLaunchCooperativeKernel + cudaFree + cudaDeviceGetAttribute + cudaMallocHost + cudaFreeHost cudaStreamSynchronize + cudaLaunchCooperativeKernel + cudaMalloc + cudaOccupancyMaxActiveBlocksPerMultiprocessor cudaMemcpyAsync + cudaOccupancyMaxPotentialBlockSize whole @@ -53,6 +53,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleAWBarrier/README.md b/Samples/0_Introduction/simpleAWBarrier/README.md index d81ac1f8..064db83a 100644 --- a/Samples/0_Introduction/simpleAWBarrier/README.md +++ b/Samples/0_Introduction/simpleAWBarrier/README.md @@ -10,7 +10,7 @@ Arrive Wait Barrier ## Supported SM Architectures -[SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMallocHost, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaOccupancyMaxPotentialBlockSize, cudaDeviceGetAttribute, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaLaunchCooperativeKernel, cudaStreamSynchronize, cudaMemcpyAsync +cudaStreamCreateWithFlags, cudaFree, cudaDeviceGetAttribute, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaLaunchCooperativeKernel, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaMemcpyAsync, cudaOccupancyMaxPotentialBlockSize ## Dependencies needed to build/run [CPP11](../../../README.md#cpp11), [MBCG](../../../README.md#mbcg) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj index ea64526a..ed136540 100644 --- a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleAWBarrier.exe - compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj index aaf046ed..eeddba29 100644 --- a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleAWBarrier.exe - compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj index 28637338..85eb24bf 100644 --- a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleAWBarrier.exe - compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleAssert/Makefile b/Samples/0_Introduction/simpleAssert/Makefile index fb73574b..bd790aa6 100644 --- a/Samples/0_Introduction/simpleAssert/Makefile +++ b/Samples/0_Introduction/simpleAssert/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleAssert/NsightEclipse.xml b/Samples/0_Introduction/simpleAssert/NsightEclipse.xml index 7f9e81f5..2ba03ec6 100644 --- a/Samples/0_Introduction/simpleAssert/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleAssert/NsightEclipse.xml @@ -41,6 +41,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleAssert/README.md b/Samples/0_Introduction/simpleAssert/README.md index e5fbc1b3..05b753a1 100644 --- a/Samples/0_Introduction/simpleAssert/README.md +++ b/Samples/0_Introduction/simpleAssert/README.md @@ -10,7 +10,7 @@ Assert ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -27,7 +27,7 @@ cudaDeviceSynchronize, cudaGetErrorString ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj index a033d7b0..731833f9 100644 --- a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleAssert.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj index a3233782..88e2fa09 100644 --- a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleAssert.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj index de3d8f03..ce935120 100644 --- a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleAssert.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/README.md b/Samples/0_Introduction/simpleAssert_nvrtc/README.md index d0ecd7e0..72c5de11 100644 --- a/Samples/0_Introduction/simpleAssert_nvrtc/README.md +++ b/Samples/0_Introduction/simpleAssert_nvrtc/README.md @@ -10,7 +10,7 @@ Assert, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuCtxSynchronize, cuLaunchKernel +cuModuleGetFunction, cuLaunchKernel, cuCtxSynchronize ## Dependencies needed to build/run [NVRTC](../../../README.md#nvrtc) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj index 11b8003f..3fc089e5 100644 --- a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj index 548b3baf..0714d837 100644 --- a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj index 59d571ce..fc010fb0 100644 --- a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile b/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile index 7b221fb2..b7222445 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile +++ b/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml b/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml index 093f108b..e9252d1c 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml @@ -3,12 +3,12 @@ simpleAtomicIntrinsics + cudaStreamCreateWithFlags cudaFree cudaMallocHost cudaFreeHost - cudaMalloc - cudaStreamCreateWithFlags cudaStreamSynchronize + cudaMalloc cudaMemcpyAsync @@ -48,6 +48,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/README.md b/Samples/0_Introduction/simpleAtomicIntrinsics/README.md index 81693b71..0fa52781 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics/README.md +++ b/Samples/0_Introduction/simpleAtomicIntrinsics/README.md @@ -10,7 +10,7 @@ Atomic Intrinsics ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaMemcpyAsync +cudaStreamCreateWithFlags, cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj index 87308a5d..d122ae68 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleAtomicIntrinsics.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj index b99f4190..7f05dcc6 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleAtomicIntrinsics.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj index bb0bb8df..7dd8d89e 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleAtomicIntrinsics.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md index 0d1700a7..a53e822b 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md +++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md @@ -10,7 +10,7 @@ Atomic Intrinsics, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) cudaBlockSize, cudaGridSize @@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj index c51f9939..9db171b1 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj index 75b918e7..b43cec91 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj index f7cb9e38..bd705f44 100644 --- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleAttributes/Makefile b/Samples/0_Introduction/simpleAttributes/Makefile index 00e9c4f0..e685dd69 100644 --- a/Samples/0_Introduction/simpleAttributes/Makefile +++ b/Samples/0_Introduction/simpleAttributes/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml b/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml index 5141efa1..fcad8235 100644 --- a/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml @@ -3,15 +3,15 @@ simpleAttributes - cudaDeviceSetLimit cudaFree cudaMallocHost - cudaStreamCreate cudaFreeHost - cudaMalloc - cudaStreamSetAttribute - cudaMemcpyAsync cudaStreamSynchronize + cudaStreamSetAttribute + cudaDeviceSetLimit + cudaMalloc + cudaMemcpyAsync + cudaStreamCreate cudaGetDeviceProperties @@ -49,6 +49,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleAttributes/README.md b/Samples/0_Introduction/simpleAttributes/README.md index e5b6fa87..5dc1787b 100644 --- a/Samples/0_Introduction/simpleAttributes/README.md +++ b/Samples/0_Introduction/simpleAttributes/README.md @@ -10,7 +10,7 @@ Attributes usage on stream ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceSetLimit, cudaFree, cudaMallocHost, cudaStreamCreate, cudaFreeHost, cudaMalloc, cudaStreamSetAttribute, cudaMemcpyAsync, cudaStreamSynchronize, cudaGetDeviceProperties +cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaStreamSetAttribute, cudaDeviceSetLimit, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj index 2752b266..a446d3a9 100644 --- a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleAttributes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj index 620fdca3..e49167d9 100644 --- a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleAttributes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj index 9ab8f931..1eb61252 100644 --- a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleAttributes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleCUDA2GL/Makefile b/Samples/0_Introduction/simpleCUDA2GL/Makefile index 39042604..80e3250f 100644 --- a/Samples/0_Introduction/simpleCUDA2GL/Makefile +++ b/Samples/0_Introduction/simpleCUDA2GL/Makefile @@ -311,9 +311,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml b/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml index 55b9400c..1f40f86b 100644 --- a/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml @@ -3,19 +3,19 @@ simpleCUDA2GL - cudaFree - cudaGraphicsMapResources - cudaGraphicsUnregisterResource - cudaMemcpyToArray - cudaGraphicsGLRegisterBuffer cudaHostAlloc - cudaGraphicsResourceGetMappedPointer - cudaProcess - cudaDeviceSynchronize - cudaMalloc - cudaGraphicsSubResourceGetMappedArray - cudaGraphicsGLRegisterImage cudaGraphicsUnmapResources + cudaMalloc + cudaFree + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaMemcpyToArray + cudaDeviceSynchronize + cudaProcess + cudaGraphicsUnregisterResource + cudaGraphicsSubResourceGetMappedArray + cudaGraphicsGLRegisterBuffer + cudaGraphicsGLRegisterImage whole @@ -79,6 +79,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleCUDA2GL/README.md b/Samples/0_Introduction/simpleCUDA2GL/README.md index c5f7bb6d..7c46fb7e 100644 --- a/Samples/0_Introduction/simpleCUDA2GL/README.md +++ b/Samples/0_Introduction/simpleCUDA2GL/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing, Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaMemcpyToArray, cudaGraphicsGLRegisterBuffer, cudaHostAlloc, cudaGraphicsResourceGetMappedPointer, cudaProcess, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGraphicsGLRegisterImage, cudaGraphicsUnmapResources +cudaHostAlloc, cudaGraphicsUnmapResources, cudaMalloc, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaMemcpyToArray, cudaDeviceSynchronize, cudaProcess, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsGLRegisterImage ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk b/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk +++ b/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj index b1c84ae4..65865117 100644 --- a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleCUDA2GL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj index 955b060a..19ad1de2 100644 --- a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUDA2GL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj index 46e97f62..f2b071ec 100644 --- a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUDA2GL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/0_Introduction/simpleCallback/Makefile b/Samples/0_Introduction/simpleCallback/Makefile index fade686a..ff334a83 100644 --- a/Samples/0_Introduction/simpleCallback/Makefile +++ b/Samples/0_Introduction/simpleCallback/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleCallback/NsightEclipse.xml b/Samples/0_Introduction/simpleCallback/NsightEclipse.xml index 3ee3b90d..931c7c65 100644 --- a/Samples/0_Introduction/simpleCallback/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleCallback/NsightEclipse.xml @@ -3,16 +3,16 @@ simpleCallback + cudaHostAlloc cudaStreamDestroy cudaFree - cudaStreamCreate - cudaHostAlloc + cudaSetDevice cudaGetDeviceCount cudaFreeHost - cudaMalloc - cudaSetDevice - cudaMemcpyAsync cudaStreamAddCallback + cudaMalloc + cudaMemcpyAsync + cudaStreamCreate cudaGetDeviceProperties @@ -53,6 +53,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleCallback/README.md b/Samples/0_Introduction/simpleCallback/README.md index 24c76c25..40368567 100644 --- a/Samples/0_Introduction/simpleCallback/README.md +++ b/Samples/0_Introduction/simpleCallback/README.md @@ -10,7 +10,7 @@ CUDA Streams, Callback Functions, Multithreading ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaHostAlloc, cudaGetDeviceCount, cudaFreeHost, cudaMalloc, cudaSetDevice, cudaMemcpyAsync, cudaStreamAddCallback, cudaGetDeviceProperties +cudaHostAlloc, cudaStreamDestroy, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaFreeHost, cudaStreamAddCallback, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj index 677ea6b2..ad8bf900 100644 --- a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleCallback.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj index 0dcbadea..b200ba6f 100644 --- a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCallback.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj index 6f2e491b..196f5794 100644 --- a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCallback.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleCooperativeGroups/Makefile b/Samples/0_Introduction/simpleCooperativeGroups/Makefile index c45b7332..22efbff3 100644 --- a/Samples/0_Introduction/simpleCooperativeGroups/Makefile +++ b/Samples/0_Introduction/simpleCooperativeGroups/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml b/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml index 54d9c4d3..939f68ad 100644 --- a/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml @@ -44,6 +44,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleCooperativeGroups/README.md b/Samples/0_Introduction/simpleCooperativeGroups/README.md index 7e80f6bc..ab3e11cc 100644 --- a/Samples/0_Introduction/simpleCooperativeGroups/README.md +++ b/Samples/0_Introduction/simpleCooperativeGroups/README.md @@ -10,7 +10,7 @@ Cooperative Groups ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -27,7 +27,7 @@ cudaDeviceSynchronize, cudaGetErrorString ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj index 4a7bac2a..061538d8 100644 --- a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleCooperativeGroups.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj index 09d33159..bf17882a 100644 --- a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCooperativeGroups.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj index 12759203..649221c2 100644 --- a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCooperativeGroups.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleCubemapTexture/Makefile b/Samples/0_Introduction/simpleCubemapTexture/Makefile index fdff3980..4c1fed17 100644 --- a/Samples/0_Introduction/simpleCubemapTexture/Makefile +++ b/Samples/0_Introduction/simpleCubemapTexture/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml b/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml index 625ed7a5..1bf6b010 100644 --- a/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml @@ -3,17 +3,17 @@ simpleCubemapTexture - cudaFree + cudaMemcpy + cudaCreateChannelDesc cudaFreeArray + cudaFree + cudaPitchedPtr + cudaPos + cudaDestroyTextureObject cudaExtent cudaDeviceSynchronize - cudaDestroyTextureObject - cudaPitchedPtr cudaCreateTextureObject cudaMalloc - cudaCreateChannelDesc - cudaPos - cudaMemcpy cudaGetDeviceProperties @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleCubemapTexture/README.md b/Samples/0_Introduction/simpleCubemapTexture/README.md index 68f06947..44c3896f 100644 --- a/Samples/0_Introduction/simpleCubemapTexture/README.md +++ b/Samples/0_Introduction/simpleCubemapTexture/README.md @@ -10,7 +10,7 @@ Texture, Volume Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaFreeArray, cudaExtent, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaPitchedPtr, cudaCreateTextureObject, cudaMalloc, cudaCreateChannelDesc, cudaPos, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaCreateChannelDesc, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaPos, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj index 26ae9423..307c5282 100644 --- a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleCubemapTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj index bcdaec2f..709f0362 100644 --- a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCubemapTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj index 0dd40bfd..3c332afb 100644 --- a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCubemapTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleDrvRuntime/Makefile b/Samples/0_Introduction/simpleDrvRuntime/Makefile index 3cbc5811..46593a89 100644 --- a/Samples/0_Introduction/simpleDrvRuntime/Makefile +++ b/Samples/0_Introduction/simpleDrvRuntime/Makefile @@ -283,9 +283,9 @@ FATBIN_FILE := vectorAdd_kernel${TARGET_SIZE}.fatbin # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleDrvRuntime/README.md b/Samples/0_Introduction/simpleDrvRuntime/README.md index 74ea4ad9..158157e8 100644 --- a/Samples/0_Introduction/simpleDrvRuntime/README.md +++ b/Samples/0_Introduction/simpleDrvRuntime/README.md @@ -10,7 +10,7 @@ CUDA Driver API, CUDA Runtime API, Vector Addition ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuCtxDestroy, cuModuleLoadData, cuCtxCreate, cuLaunchKernel, cuModuleUnload, cuInit, cuModuleGetFunction +cuLaunchKernel, cuModuleLoadData, cuCtxDestroy, cuModuleUnload, cuModuleGetFunction, cuCtxCreate, cuInit ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaMemcpyAsync +cudaStreamCreateWithFlags, cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj index 92e427cd..019fc0cc 100644 --- a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleDrvRuntime.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj index 64f8fab9..727c658d 100644 --- a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleDrvRuntime.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj index 7f3d2b01..93b2ffad 100644 --- a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleDrvRuntime.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleHyperQ/Makefile b/Samples/0_Introduction/simpleHyperQ/Makefile index 48018511..16140688 100644 --- a/Samples/0_Introduction/simpleHyperQ/Makefile +++ b/Samples/0_Introduction/simpleHyperQ/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml b/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml index 8777270a..1a503845 100644 --- a/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml @@ -3,20 +3,20 @@ simpleHyperQ - cudaStreamDestroy - cudaFree - cudaEventRecord - cudaMallocHost - cudaStreamCreate - cudaEventCreate - cudaEventElapsedTime - cudaEventSynchronize - cudaFreeHost - cudaMalloc - cudaEventDestroy cudaMemcpy - cudaGetDeviceProperties + cudaStreamDestroy + cudaMalloc + cudaFree + cudaMallocHost + cudaEventSynchronize + cudaEventRecord + cudaFreeHost cudaGetDevice + cudaEventDestroy + cudaEventElapsedTime + cudaStreamCreate + cudaGetDeviceProperties + cudaEventCreate whole @@ -62,6 +62,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleHyperQ/README.md b/Samples/0_Introduction/simpleHyperQ/README.md index 8527317e..467bc4b3 100644 --- a/Samples/0_Introduction/simpleHyperQ/README.md +++ b/Samples/0_Introduction/simpleHyperQ/README.md @@ -10,7 +10,7 @@ CUDA Systems Integration, Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaEventRecord, cudaMallocHost, cudaStreamCreate, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventDestroy, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaStreamDestroy, cudaMalloc, cudaFree, cudaMallocHost, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDevice, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj index 1e4bedc3..d2bbd16a 100644 --- a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleHyperQ.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj index b9180fa0..3a6cc72c 100644 --- a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleHyperQ.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj index b641b971..c15d7eec 100644 --- a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleHyperQ.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleIPC/Makefile b/Samples/0_Introduction/simpleIPC/Makefile index 31945c1e..914f1ab6 100644 --- a/Samples/0_Introduction/simpleIPC/Makefile +++ b/Samples/0_Introduction/simpleIPC/Makefile @@ -303,9 +303,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleIPC/NsightEclipse.xml b/Samples/0_Introduction/simpleIPC/NsightEclipse.xml index 23e3c5a7..d25608df 100644 --- a/Samples/0_Introduction/simpleIPC/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleIPC/NsightEclipse.xml @@ -3,29 +3,29 @@ simpleIPC - cudaDeviceEnablePeerAccess - cudaIpcOpenEventHandle - cudaOccupancyMaxActiveBlocksPerMultiprocessor - cudaStreamCreateWithFlags - cudaDeviceCanAccessPeer - cudaMemcpyAsync - cudaStreamDestroy - cudaEventCreate - cudaMalloc - cudaEventDestroy cudaSetDevice - cudaIpcOpenMemHandle - cudaGetDeviceProperties - cudaGetDeviceCount - cudaIpcGetEventHandle - cudaGetLastError - cudaStreamSynchronize - cudaStreamWaitEvent - cudaFree cudaIpcCloseMemHandle + cudaEventDestroy + cudaGetDeviceCount + cudaMemcpyAsync + cudaDeviceCanAccessPeer + cudaStreamCreateWithFlags + cudaStreamDestroy + cudaGetLastError + cudaIpcOpenEventHandle + cudaIpcOpenMemHandle + cudaIpcGetEventHandle + cudaStreamWaitEvent + cudaEventCreate + cudaFree + cudaEventSynchronize cudaEventRecord cudaIpcGetMemHandle - cudaEventSynchronize + cudaStreamSynchronize + cudaDeviceEnablePeerAccess + cudaMalloc + cudaOccupancyMaxActiveBlocksPerMultiprocessor + cudaGetDeviceProperties whole @@ -71,6 +71,7 @@ sm80 sm86 sm87 + sm90 ../../../Common/helper_multiprocess.cpp ../../../Common/helper_multiprocess.h diff --git a/Samples/0_Introduction/simpleIPC/README.md b/Samples/0_Introduction/simpleIPC/README.md index 1594c529..a9d3336b 100644 --- a/Samples/0_Introduction/simpleIPC/README.md +++ b/Samples/0_Introduction/simpleIPC/README.md @@ -10,7 +10,7 @@ CUDA Systems Integration, Peer to Peer, InterProcess Communication ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceEnablePeerAccess, cudaIpcOpenEventHandle, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaStreamCreateWithFlags, cudaDeviceCanAccessPeer, cudaMemcpyAsync, cudaStreamDestroy, cudaEventCreate, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaIpcOpenMemHandle, cudaGetDeviceProperties, cudaGetDeviceCount, cudaIpcGetEventHandle, cudaGetLastError, cudaStreamSynchronize, cudaStreamWaitEvent, cudaFree, cudaIpcCloseMemHandle, cudaEventRecord, cudaIpcGetMemHandle, cudaEventSynchronize +cudaSetDevice, cudaIpcCloseMemHandle, cudaEventDestroy, cudaGetDeviceCount, cudaMemcpyAsync, cudaDeviceCanAccessPeer, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaGetLastError, cudaIpcOpenEventHandle, cudaIpcOpenMemHandle, cudaIpcGetEventHandle, cudaStreamWaitEvent, cudaEventCreate, cudaFree, cudaEventSynchronize, cudaEventRecord, cudaIpcGetMemHandle, cudaStreamSynchronize, cudaDeviceEnablePeerAccess, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties ## Dependencies needed to build/run [IPC](../../../README.md#ipc) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj index 99d342ac..03771430 100644 --- a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleIPC.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj index 1e507919..4d8096a2 100644 --- a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleIPC.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj index d9e9f48d..df3aba1f 100644 --- a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleIPC.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleLayeredTexture/Makefile b/Samples/0_Introduction/simpleLayeredTexture/Makefile index bd2660f6..eeb6d7ea 100644 --- a/Samples/0_Introduction/simpleLayeredTexture/Makefile +++ b/Samples/0_Introduction/simpleLayeredTexture/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml b/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml index 91481c99..ff2bc6f0 100644 --- a/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml @@ -3,17 +3,17 @@ simpleLayeredTexture - cudaFree + cudaMemcpy + cudaCreateChannelDesc cudaFreeArray + cudaFree + cudaPitchedPtr + cudaPos + cudaDestroyTextureObject cudaExtent cudaDeviceSynchronize - cudaDestroyTextureObject - cudaPitchedPtr cudaCreateTextureObject cudaMalloc - cudaCreateChannelDesc - cudaPos - cudaMemcpy cudaGetDeviceProperties @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleLayeredTexture/README.md b/Samples/0_Introduction/simpleLayeredTexture/README.md index 3add6778..5dc0eb71 100644 --- a/Samples/0_Introduction/simpleLayeredTexture/README.md +++ b/Samples/0_Introduction/simpleLayeredTexture/README.md @@ -10,7 +10,7 @@ Texture, Volume Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaFreeArray, cudaExtent, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaPitchedPtr, cudaCreateTextureObject, cudaMalloc, cudaCreateChannelDesc, cudaPos, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaCreateChannelDesc, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaPos, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj index 71e3a84c..ee1e3e42 100644 --- a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleLayeredTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj index fe147d3e..3ae1a4f9 100644 --- a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleLayeredTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj index a99c2ee3..0d29aae5 100644 --- a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleLayeredTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleMPI/Makefile b/Samples/0_Introduction/simpleMPI/Makefile index 49fc56c3..8726e03d 100644 --- a/Samples/0_Introduction/simpleMPI/Makefile +++ b/Samples/0_Introduction/simpleMPI/Makefile @@ -335,9 +335,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleMPI/README.md b/Samples/0_Introduction/simpleMPI/README.md index 6f56a03d..5e0f97fa 100644 --- a/Samples/0_Introduction/simpleMPI/README.md +++ b/Samples/0_Introduction/simpleMPI/README.md @@ -10,7 +10,7 @@ CUDA Systems Integration, MPI, Multithreading ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaGetLastError, cudaFree, cudaMemcpy +cudaMalloc, cudaGetLastError, cudaMemcpy, cudaFree ## Dependencies needed to build/run [MPI](../../../README.md#mpi) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj index 4e1777bc..94e77612 100644 --- a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleMPI.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj index cf5e568b..97822220 100644 --- a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleMPI.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj index 2959c87e..8f6ea5ae 100644 --- a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleMPI.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleMultiCopy/Makefile b/Samples/0_Introduction/simpleMultiCopy/Makefile index 26974b35..d6d253c6 100644 --- a/Samples/0_Introduction/simpleMultiCopy/Makefile +++ b/Samples/0_Introduction/simpleMultiCopy/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml b/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml index ca79562c..bb76ce8c 100644 --- a/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml @@ -3,22 +3,22 @@ simpleMultiCopy - cudaMemset - cudaFree - cudaStreamDestroy - cudaEventRecord - cudaStreamCreate cudaHostAlloc - cudaEventCreate - cudaEventElapsedTime - cudaDeviceSynchronize - cudaEventSynchronize - cudaFreeHost + cudaStreamDestroy cudaMalloc - cudaEventDestroy - cudaSetDevice cudaMemcpyAsync + cudaFree + cudaSetDevice + cudaEventSynchronize + cudaDeviceSynchronize + cudaEventRecord + cudaFreeHost + cudaMemset + cudaEventDestroy + cudaEventElapsedTime + cudaStreamCreate cudaGetDeviceProperties + cudaEventCreate whole @@ -66,6 +66,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleMultiCopy/README.md b/Samples/0_Introduction/simpleMultiCopy/README.md index 8f015c26..72404287 100644 --- a/Samples/0_Introduction/simpleMultiCopy/README.md +++ b/Samples/0_Introduction/simpleMultiCopy/README.md @@ -10,7 +10,7 @@ CUDA Streams and Events, Asynchronous Data Transfers, Overlap Compute and Copy, ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaStreamDestroy, cudaEventRecord, cudaStreamCreate, cudaHostAlloc, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpyAsync, cudaGetDeviceProperties +cudaHostAlloc, cudaStreamDestroy, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaSetDevice, cudaEventSynchronize, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaMemset, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj index 86ccf67a..8fbcf08c 100644 --- a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleMultiCopy.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj index d1a1609f..1f77866f 100644 --- a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleMultiCopy.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj index 5251d592..447b6331 100644 --- a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleMultiCopy.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleMultiGPU/Makefile b/Samples/0_Introduction/simpleMultiGPU/Makefile index 6db255e4..15d13dde 100644 --- a/Samples/0_Introduction/simpleMultiGPU/Makefile +++ b/Samples/0_Introduction/simpleMultiGPU/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml b/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml index 500fc9ea..a1e377e5 100644 --- a/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml @@ -6,13 +6,13 @@ cudaStreamDestroy cudaFree cudaMallocHost + cudaSetDevice + cudaFreeHost + cudaStreamSynchronize + cudaMalloc + cudaMemcpyAsync cudaStreamCreate cudaGetDeviceCount - cudaFreeHost - cudaMalloc - cudaSetDevice - cudaStreamSynchronize - cudaMemcpyAsync whole @@ -53,6 +53,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleMultiGPU/README.md b/Samples/0_Introduction/simpleMultiGPU/README.md index 0f8464c2..284904f8 100644 --- a/Samples/0_Introduction/simpleMultiGPU/README.md +++ b/Samples/0_Introduction/simpleMultiGPU/README.md @@ -10,7 +10,7 @@ Asynchronous Data Transfers, CUDA Streams and Events, Multithreading, Multi-GPU ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaMallocHost, cudaStreamCreate, cudaGetDeviceCount, cudaFreeHost, cudaMalloc, cudaSetDevice, cudaStreamSynchronize, cudaMemcpyAsync +cudaStreamDestroy, cudaFree, cudaMallocHost, cudaSetDevice, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceCount ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj index bcc574be..a025b2ec 100644 --- a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleMultiGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj index 30a6f199..2a6ce253 100644 --- a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleMultiGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj index 6fd4139c..315059ca 100644 --- a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleMultiGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleOccupancy/Makefile b/Samples/0_Introduction/simpleOccupancy/Makefile index 85aa3c9f..b735ec0c 100644 --- a/Samples/0_Introduction/simpleOccupancy/Makefile +++ b/Samples/0_Introduction/simpleOccupancy/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml b/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml index 34577846..e4383b1c 100644 --- a/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml @@ -3,17 +3,17 @@ simpleOccupancy - cudaFree - cudaEventRecord - cudaOccupancyMaxActiveBlocksPerMultiprocessor - cudaEventCreate - cudaOccupancyMaxPotentialBlockSize - cudaEventElapsedTime - cudaDeviceSynchronize - cudaMalloc cudaMemcpy - cudaGetDeviceProperties + cudaFree + cudaDeviceSynchronize + cudaEventRecord cudaGetDevice + cudaMalloc + cudaEventElapsedTime + cudaOccupancyMaxActiveBlocksPerMultiprocessor + cudaGetDeviceProperties + cudaOccupancyMaxPotentialBlockSize + cudaEventCreate whole @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleOccupancy/README.md b/Samples/0_Introduction/simpleOccupancy/README.md index 4ca96acf..ddc12f2a 100644 --- a/Samples/0_Introduction/simpleOccupancy/README.md +++ b/Samples/0_Introduction/simpleOccupancy/README.md @@ -10,7 +10,7 @@ Occupancy Calculator ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaEventRecord, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaEventCreate, cudaOccupancyMaxPotentialBlockSize, cudaEventElapsedTime, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMalloc, cudaEventElapsedTime, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties, cudaOccupancyMaxPotentialBlockSize, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj index ee3e8ca5..d4d97a02 100644 --- a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleOccupancy.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj index a0db9b8e..096cea4a 100644 --- a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleOccupancy.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj index 312b5e69..57de8a55 100644 --- a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleOccupancy.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleP2P/Makefile b/Samples/0_Introduction/simpleP2P/Makefile index 036ff0d8..804aa449 100644 --- a/Samples/0_Introduction/simpleP2P/Makefile +++ b/Samples/0_Introduction/simpleP2P/Makefile @@ -303,9 +303,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleP2P/NsightEclipse.xml b/Samples/0_Introduction/simpleP2P/NsightEclipse.xml index 69fc274d..65fe83bb 100644 --- a/Samples/0_Introduction/simpleP2P/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleP2P/NsightEclipse.xml @@ -3,23 +3,23 @@ simpleP2P - cudaDeviceEnablePeerAccess - cudaFree - cudaEventRecord - cudaMallocHost - cudaGetDeviceCount - cudaEventElapsedTime - cudaDeviceSynchronize - cudaEventSynchronize - cudaFreeHost - cudaMalloc - cudaEventCreateWithFlags - cudaDeviceCanAccessPeer - cudaEventDestroy - cudaSetDevice - cudaDeviceDisablePeerAccess cudaMemcpy + cudaMalloc + cudaFree + cudaMallocHost + cudaEventCreateWithFlags + cudaSetDevice + cudaEventSynchronize + cudaDeviceDisablePeerAccess + cudaGetDeviceCount + cudaDeviceSynchronize + cudaEventRecord + cudaFreeHost cudaGetDeviceProperties + cudaDeviceEnablePeerAccess + cudaEventDestroy + cudaEventElapsedTime + cudaDeviceCanAccessPeer whole @@ -67,6 +67,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleP2P/README.md b/Samples/0_Introduction/simpleP2P/README.md index cbe3b252..56b4b8bf 100644 --- a/Samples/0_Introduction/simpleP2P/README.md +++ b/Samples/0_Introduction/simpleP2P/README.md @@ -10,7 +10,7 @@ Performance Strategies, Asynchronous Data Transfers, Unified Virtual Address Spa ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceEnablePeerAccess, cudaFree, cudaEventRecord, cudaMallocHost, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventCreateWithFlags, cudaDeviceCanAccessPeer, cudaEventDestroy, cudaSetDevice, cudaDeviceDisablePeerAccess, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaMalloc, cudaFree, cudaMallocHost, cudaEventCreateWithFlags, cudaSetDevice, cudaEventSynchronize, cudaDeviceDisablePeerAccess, cudaGetDeviceCount, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDeviceProperties, cudaDeviceEnablePeerAccess, cudaEventDestroy, cudaEventElapsedTime, cudaDeviceCanAccessPeer ## Dependencies needed to build/run [only-64-bit](../../../README.md#only-64-bit) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj index aea119d9..41efff17 100644 --- a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleP2P.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj index af3b8074..d51f6d7b 100644 --- a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleP2P.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj index ea28f070..9ed32164 100644 --- a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleP2P.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simplePitchLinearTexture/Makefile b/Samples/0_Introduction/simplePitchLinearTexture/Makefile index 32d993f2..98218e83 100644 --- a/Samples/0_Introduction/simplePitchLinearTexture/Makefile +++ b/Samples/0_Introduction/simplePitchLinearTexture/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simplePitchLinearTexture/NsightEclipse.xml b/Samples/0_Introduction/simplePitchLinearTexture/NsightEclipse.xml index e1082063..be7882b8 100644 --- a/Samples/0_Introduction/simplePitchLinearTexture/NsightEclipse.xml +++ b/Samples/0_Introduction/simplePitchLinearTexture/NsightEclipse.xml @@ -3,18 +3,18 @@ simplePitchLinearTexture - cudaFree - cudaMemcpyToArray - cudaEventRecord + cudaMallocArray cudaFreeArray - cudaEventCreate - cudaEventElapsedTime + cudaFree + cudaMallocPitch cudaDestroyTextureObject cudaEventSynchronize - cudaMallocPitch + cudaMemcpyToArray + cudaEventRecord cudaCreateTextureObject cudaEventDestroy - cudaMallocArray + cudaEventElapsedTime + cudaEventCreate whole @@ -54,6 +54,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simplePitchLinearTexture/README.md b/Samples/0_Introduction/simplePitchLinearTexture/README.md index dc437cf2..95944a88 100644 --- a/Samples/0_Introduction/simplePitchLinearTexture/README.md +++ b/Samples/0_Introduction/simplePitchLinearTexture/README.md @@ -10,7 +10,7 @@ Texture, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMemcpyToArray, cudaEventRecord, cudaFreeArray, cudaEventCreate, cudaEventElapsedTime, cudaDestroyTextureObject, cudaEventSynchronize, cudaMallocPitch, cudaCreateTextureObject, cudaEventDestroy, cudaMallocArray +cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaDestroyTextureObject, cudaEventSynchronize, cudaMemcpyToArray, cudaEventRecord, cudaCreateTextureObject, cudaEventDestroy, cudaEventElapsedTime, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2017.vcxproj b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2017.vcxproj index ae30718f..f33a061e 100644 --- a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2017.vcxproj +++ b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simplePitchLinearTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2019.vcxproj b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2019.vcxproj index 60bf7a61..c08e4de3 100644 --- a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2019.vcxproj +++ b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simplePitchLinearTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2022.vcxproj b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2022.vcxproj index e964badc..441ae1c9 100644 --- a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2022.vcxproj +++ b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simplePitchLinearTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simplePrintf/Makefile b/Samples/0_Introduction/simplePrintf/Makefile index e1297aa6..3b8cf8a0 100644 --- a/Samples/0_Introduction/simplePrintf/Makefile +++ b/Samples/0_Introduction/simplePrintf/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simplePrintf/README.md b/Samples/0_Introduction/simplePrintf/README.md index fd29cf26..872faf89 100644 --- a/Samples/0_Introduction/simplePrintf/README.md +++ b/Samples/0_Introduction/simplePrintf/README.md @@ -10,7 +10,7 @@ Debugging ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceSynchronize, cudaGetDeviceProperties, cudaGetDevice +cudaGetDeviceProperties, cudaDeviceSynchronize, cudaGetDevice ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2017.vcxproj b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2017.vcxproj index e06b0f93..861f30b9 100644 --- a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2017.vcxproj +++ b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simplePrintf.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2019.vcxproj b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2019.vcxproj index 84c7cfbb..6dcb3c5e 100644 --- a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2019.vcxproj +++ b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simplePrintf.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2022.vcxproj b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2022.vcxproj index 039c5fb2..e45b5953 100644 --- a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2022.vcxproj +++ b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simplePrintf.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleSeparateCompilation/Makefile b/Samples/0_Introduction/simpleSeparateCompilation/Makefile index 59116ae7..4a92e480 100644 --- a/Samples/0_Introduction/simpleSeparateCompilation/Makefile +++ b/Samples/0_Introduction/simpleSeparateCompilation/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleSeparateCompilation/NsightEclipse.xml b/Samples/0_Introduction/simpleSeparateCompilation/NsightEclipse.xml index 586ab477..2f1c15ab 100644 --- a/Samples/0_Introduction/simpleSeparateCompilation/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleSeparateCompilation/NsightEclipse.xml @@ -12,11 +12,11 @@ - cudaFree - cudaMalloc - cudaGetLastError cudaMemcpy cudaMemcpyFromSymbol + cudaFree + cudaGetLastError + cudaMalloc separate @@ -56,6 +56,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleSeparateCompilation/README.md b/Samples/0_Introduction/simpleSeparateCompilation/README.md index b83c4f5c..a4b54487 100644 --- a/Samples/0_Introduction/simpleSeparateCompilation/README.md +++ b/Samples/0_Introduction/simpleSeparateCompilation/README.md @@ -10,7 +10,7 @@ Separate Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaMemcpyFromSymbol +cudaMemcpy, cudaMemcpyFromSymbol, cudaFree, cudaGetLastError, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2017.vcxproj b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2017.vcxproj index f8293c63..fc05d0f3 100644 --- a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleSeparateCompilation.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2019.vcxproj b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2019.vcxproj index db4e0716..d6f50cf4 100644 --- a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleSeparateCompilation.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2022.vcxproj b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2022.vcxproj index 07e55c2b..758766ef 100644 --- a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleSeparateCompilation.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/0_Introduction/simpleStreams/Makefile b/Samples/0_Introduction/simpleStreams/Makefile index 0c088686..0e83a307 100644 --- a/Samples/0_Introduction/simpleStreams/Makefile +++ b/Samples/0_Introduction/simpleStreams/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleStreams/NsightEclipse.xml b/Samples/0_Introduction/simpleStreams/NsightEclipse.xml index b2b5aff6..d7fb6d38 100644 --- a/Samples/0_Introduction/simpleStreams/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleStreams/NsightEclipse.xml @@ -4,25 +4,25 @@ simpleStreams cudaMemcpy - cudaMemcpyAsync - cudaStreamDestroy - cudaMallocHost - cudaHostAlloc - cudaHostRegister - cudaMalloc - cudaEventCreateWithFlags - cudaEventDestroy cudaSetDeviceFlags cudaSetDevice - cudaGetDeviceProperties - cudaGetDeviceCount - cudaEventElapsedTime - cudaMemset - cudaFree - cudaEventRecord + cudaEventDestroy cudaStreamCreate - cudaEventSynchronize + cudaMallocHost + cudaEventCreateWithFlags cudaFreeHost + cudaMemcpyAsync + cudaGetDeviceCount + cudaStreamDestroy + cudaMemset + cudaEventElapsedTime + cudaHostAlloc + cudaFree + cudaHostRegister + cudaEventSynchronize + cudaEventRecord + cudaMalloc + cudaGetDeviceProperties cudaHostUnregister @@ -61,6 +61,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleStreams/README.md b/Samples/0_Introduction/simpleStreams/README.md index 7c33b907..a9de18f8 100644 --- a/Samples/0_Introduction/simpleStreams/README.md +++ b/Samples/0_Introduction/simpleStreams/README.md @@ -10,7 +10,7 @@ Asynchronous Data Transfers, CUDA Streams and Events ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpy, cudaMemcpyAsync, cudaStreamDestroy, cudaMallocHost, cudaHostAlloc, cudaHostRegister, cudaMalloc, cudaEventCreateWithFlags, cudaEventDestroy, cudaSetDeviceFlags, cudaSetDevice, cudaGetDeviceProperties, cudaGetDeviceCount, cudaEventElapsedTime, cudaMemset, cudaFree, cudaEventRecord, cudaStreamCreate, cudaEventSynchronize, cudaFreeHost, cudaHostUnregister +cudaMemcpy, cudaSetDeviceFlags, cudaSetDevice, cudaEventDestroy, cudaStreamCreate, cudaMallocHost, cudaEventCreateWithFlags, cudaFreeHost, cudaMemcpyAsync, cudaGetDeviceCount, cudaStreamDestroy, cudaMemset, cudaEventElapsedTime, cudaHostAlloc, cudaFree, cudaHostRegister, cudaEventSynchronize, cudaEventRecord, cudaMalloc, cudaGetDeviceProperties, cudaHostUnregister ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2017.vcxproj b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2017.vcxproj index 1aae760f..b0fc51da 100644 --- a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleStreams.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2019.vcxproj b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2019.vcxproj index f950cfd8..6b96b6b6 100644 --- a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleStreams.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2022.vcxproj b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2022.vcxproj index 58020040..cf0c0e4d 100644 --- a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleStreams.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleSurfaceWrite/Makefile b/Samples/0_Introduction/simpleSurfaceWrite/Makefile index 024e0d4c..7440eee7 100644 --- a/Samples/0_Introduction/simpleSurfaceWrite/Makefile +++ b/Samples/0_Introduction/simpleSurfaceWrite/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleSurfaceWrite/NsightEclipse.xml b/Samples/0_Introduction/simpleSurfaceWrite/NsightEclipse.xml index 182fad40..4d9153cd 100644 --- a/Samples/0_Introduction/simpleSurfaceWrite/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleSurfaceWrite/NsightEclipse.xml @@ -6,18 +6,18 @@ output.pgm - cudaFree - cudaMemcpyToArray + cudaMemcpy + cudaCreateChannelDesc + cudaMallocArray cudaFreeArray - cudaDestroySurfaceObject - cudaCreateSurfaceObject + cudaFree cudaDestroyTextureObject + cudaMemcpyToArray + cudaDestroySurfaceObject cudaDeviceSynchronize + cudaCreateSurfaceObject cudaCreateTextureObject cudaMalloc - cudaMallocArray - cudaCreateChannelDesc - cudaMemcpy cudaGetDeviceProperties @@ -61,6 +61,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleSurfaceWrite/README.md b/Samples/0_Introduction/simpleSurfaceWrite/README.md index a93a9d6a..944cf2fc 100644 --- a/Samples/0_Introduction/simpleSurfaceWrite/README.md +++ b/Samples/0_Introduction/simpleSurfaceWrite/README.md @@ -10,7 +10,7 @@ Texture, Surface Writes, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMemcpyToArray, cudaFreeArray, cudaDestroySurfaceObject, cudaCreateSurfaceObject, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaCreateChannelDesc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaFree, cudaDestroyTextureObject, cudaMemcpyToArray, cudaDestroySurfaceObject, cudaDeviceSynchronize, cudaCreateSurfaceObject, cudaCreateTextureObject, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2017.vcxproj b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2017.vcxproj index c076e271..8a65d23a 100644 --- a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleSurfaceWrite.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2019.vcxproj b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2019.vcxproj index c9fdbc59..d61b8689 100644 --- a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleSurfaceWrite.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2022.vcxproj b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2022.vcxproj index 1b8e29b3..d5275f35 100644 --- a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleSurfaceWrite.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleTemplates/Makefile b/Samples/0_Introduction/simpleTemplates/Makefile index 78ad03d3..beac3d79 100644 --- a/Samples/0_Introduction/simpleTemplates/Makefile +++ b/Samples/0_Introduction/simpleTemplates/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleTemplates/NsightEclipse.xml b/Samples/0_Introduction/simpleTemplates/NsightEclipse.xml index 343057ce..36149261 100644 --- a/Samples/0_Introduction/simpleTemplates/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleTemplates/NsightEclipse.xml @@ -4,9 +4,9 @@ simpleTemplates cudaMalloc - cudaFree cudaMemcpy cudaGetDeviceProperties + cudaFree whole @@ -42,6 +42,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleTemplates/README.md b/Samples/0_Introduction/simpleTemplates/README.md index 83a67103..0db67151 100644 --- a/Samples/0_Introduction/simpleTemplates/README.md +++ b/Samples/0_Introduction/simpleTemplates/README.md @@ -10,7 +10,7 @@ C++ Templates ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaMemcpy, cudaGetDeviceProperties +cudaMalloc, cudaMemcpy, cudaGetDeviceProperties, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2017.vcxproj b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2017.vcxproj index b78a4c18..96cca985 100644 --- a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleTemplates.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2019.vcxproj b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2019.vcxproj index 9c5ec6d8..93969038 100644 --- a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleTemplates.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2022.vcxproj b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2022.vcxproj index 0929e9be..90b222be 100644 --- a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleTemplates.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleTemplates_nvrtc/README.md b/Samples/0_Introduction/simpleTemplates_nvrtc/README.md index ae309261..31c588ee 100644 --- a/Samples/0_Introduction/simpleTemplates_nvrtc/README.md +++ b/Samples/0_Introduction/simpleTemplates_nvrtc/README.md @@ -10,7 +10,7 @@ C++ Templates, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction ## Dependencies needed to build/run [NVRTC](../../../README.md#nvrtc) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2017.vcxproj index 0c2dfa96..1655b536 100644 --- a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2019.vcxproj index 08f3653a..6d28d7aa 100644 --- a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2022.vcxproj index c139a274..0c8ae969 100644 --- a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/simpleTexture/Makefile b/Samples/0_Introduction/simpleTexture/Makefile index 046207e9..e705cef8 100644 --- a/Samples/0_Introduction/simpleTexture/Makefile +++ b/Samples/0_Introduction/simpleTexture/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleTexture/NsightEclipse.xml b/Samples/0_Introduction/simpleTexture/NsightEclipse.xml index a4fbab4b..0f029aea 100644 --- a/Samples/0_Introduction/simpleTexture/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleTexture/NsightEclipse.xml @@ -6,16 +6,16 @@ ./data/teapot512_bw_out.pgm - cudaFree - cudaMemcpyToArray + cudaMemcpy + cudaCreateChannelDesc + cudaMallocArray cudaFreeArray - cudaDeviceSynchronize + cudaFree cudaDestroyTextureObject + cudaMemcpyToArray + cudaDeviceSynchronize cudaCreateTextureObject cudaMalloc - cudaMallocArray - cudaCreateChannelDesc - cudaMemcpy whole @@ -59,6 +59,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleTexture/README.md b/Samples/0_Introduction/simpleTexture/README.md index 8577f337..834d4ee8 100644 --- a/Samples/0_Introduction/simpleTexture/README.md +++ b/Samples/0_Introduction/simpleTexture/README.md @@ -10,7 +10,7 @@ CUDA Runtime API, Texture, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMemcpyToArray, cudaFreeArray, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaCreateChannelDesc, cudaMemcpy +cudaMemcpy, cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaFree, cudaDestroyTextureObject, cudaMemcpyToArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2017.vcxproj b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2017.vcxproj index 544d7b22..c12f6f17 100644 --- a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2019.vcxproj b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2019.vcxproj index 034f2890..6c76b4ab 100644 --- a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2022.vcxproj b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2022.vcxproj index 04f2a44d..5562ce0b 100644 --- a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleTexture3D/Makefile b/Samples/0_Introduction/simpleTexture3D/Makefile index fa6da461..f232cfff 100644 --- a/Samples/0_Introduction/simpleTexture3D/Makefile +++ b/Samples/0_Introduction/simpleTexture3D/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleTexture3D/NsightEclipse.xml b/Samples/0_Introduction/simpleTexture3D/NsightEclipse.xml index de66d67b..56848594 100644 --- a/Samples/0_Introduction/simpleTexture3D/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleTexture3D/NsightEclipse.xml @@ -3,20 +3,20 @@ simpleTexture3D - cudaFree - cudaGraphicsMapResources - cudaFreeArray - cudaGraphicsGLRegisterBuffer - cudaGraphicsResourceGetMappedPointer - cudaExtent - cudaDeviceSynchronize - cudaDestroyTextureObject - cudaPitchedPtr - cudaCreateTextureObject - cudaMalloc - cudaGraphicsUnregisterResource cudaGraphicsUnmapResources cudaMemcpy + cudaFreeArray + cudaFree + cudaPitchedPtr + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDestroyTextureObject + cudaExtent + cudaDeviceSynchronize + cudaCreateTextureObject + cudaGraphicsUnregisterResource + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -79,6 +79,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleTexture3D/README.md b/Samples/0_Introduction/simpleTexture3D/README.md index 7ea427e7..de889b8b 100644 --- a/Samples/0_Introduction/simpleTexture3D/README.md +++ b/Samples/0_Introduction/simpleTexture3D/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing, 3D Textures, Surface Writes ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaExtent, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaPitchedPtr, cudaCreateTextureObject, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy +cudaGraphicsUnmapResources, cudaMemcpy, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleTexture3D/findgllib.mk b/Samples/0_Introduction/simpleTexture3D/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/0_Introduction/simpleTexture3D/findgllib.mk +++ b/Samples/0_Introduction/simpleTexture3D/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2017.vcxproj b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2017.vcxproj index 9fefb601..ed90a63b 100644 --- a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleTexture3D.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2019.vcxproj b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2019.vcxproj index 23518ada..be0fa981 100644 --- a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleTexture3D.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2022.vcxproj b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2022.vcxproj index dd4ab2af..1dd427b0 100644 --- a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleTexture3D.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/0_Introduction/simpleTextureDrv/Makefile b/Samples/0_Introduction/simpleTextureDrv/Makefile index bca14838..95ff9ffe 100644 --- a/Samples/0_Introduction/simpleTextureDrv/Makefile +++ b/Samples/0_Introduction/simpleTextureDrv/Makefile @@ -283,9 +283,9 @@ FATBIN_FILE := simpleTexture_kernel${TARGET_SIZE}.fatbin # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(GENCODE_FLAGS),) diff --git a/Samples/0_Introduction/simpleTextureDrv/README.md b/Samples/0_Introduction/simpleTextureDrv/README.md index 3457bfbe..ee28ee7f 100644 --- a/Samples/0_Introduction/simpleTextureDrv/README.md +++ b/Samples/0_Introduction/simpleTextureDrv/README.md @@ -10,7 +10,7 @@ CUDA Driver API, Texture, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuTexObjectDestroy, cuModuleLoadData, cuCtxCreate, cuArrayCreate, cuDeviceGetName, cuLaunchKernel, cuMemAlloc, cuCtxSynchronize, cuArrayDestroy, cuTexObjectCreate, cuMemFree, cuMemcpyDtoH, cuCtxDestroy, cuDeviceGetAttribute +cuMemcpyDtoH, cuLaunchKernel, cuModuleLoadData, cuDeviceGetName, cuDeviceGetAttribute, cuCtxSynchronize, cuMemAlloc, cuArrayCreate, cuMemFree, cuCtxDestroy, cuTexObjectDestroy, cuTexObjectCreate, cuCtxCreate, cuModuleGetFunction, cuArrayDestroy ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2017.vcxproj b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2017.vcxproj index d674efc6..66dc7b33 100644 --- a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleTextureDrv.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2019.vcxproj b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2019.vcxproj index 5817eb47..e1c43716 100644 --- a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleTextureDrv.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2022.vcxproj b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2022.vcxproj index 0cbde7a7..21cf1f35 100644 --- a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleTextureDrv.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/Makefile b/Samples/0_Introduction/simpleVoteIntrinsics/Makefile index 82e01279..32edcf72 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics/Makefile +++ b/Samples/0_Introduction/simpleVoteIntrinsics/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/NsightEclipse.xml b/Samples/0_Introduction/simpleVoteIntrinsics/NsightEclipse.xml index b424859f..e91b9714 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleVoteIntrinsics/NsightEclipse.xml @@ -3,10 +3,10 @@ simpleVoteIntrinsics + cudaMemcpy cudaFree cudaDeviceSynchronize cudaMalloc - cudaMemcpy cudaGetDeviceProperties @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/README.md b/Samples/0_Introduction/simpleVoteIntrinsics/README.md index e35dece3..9c86c635 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics/README.md +++ b/Samples/0_Introduction/simpleVoteIntrinsics/README.md @@ -10,7 +10,7 @@ Vote Intrinsics ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj index c49388da..d9045a58 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleVoteIntrinsics.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj index 91da2af0..3bcc1472 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleVoteIntrinsics.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2022.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2022.vcxproj index 980c93b2..3af7fc24 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleVoteIntrinsics.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/README.md b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/README.md index 28801cbd..2f4cdeb4 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/README.md +++ b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/README.md @@ -10,7 +10,7 @@ Vote Intrinsics, CUDA Driver API, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction ## Dependencies needed to build/run [NVRTC](../../../README.md#nvrtc) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2017.vcxproj index a0d07492..0d541d12 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2019.vcxproj index 00d7d275..2c334e59 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2022.vcxproj index c8c5ff0b..69dbd968 100644 --- a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleZeroCopy/Makefile b/Samples/0_Introduction/simpleZeroCopy/Makefile index 8fe33e69..3161f90b 100644 --- a/Samples/0_Introduction/simpleZeroCopy/Makefile +++ b/Samples/0_Introduction/simpleZeroCopy/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/simpleZeroCopy/NsightEclipse.xml b/Samples/0_Introduction/simpleZeroCopy/NsightEclipse.xml index 155731c7..a5776e59 100644 --- a/Samples/0_Introduction/simpleZeroCopy/NsightEclipse.xml +++ b/Samples/0_Introduction/simpleZeroCopy/NsightEclipse.xml @@ -4,15 +4,15 @@ simpleZeroCopy cudaHostAlloc - cudaGetDeviceCount + cudaSetDeviceFlags cudaHostRegister + cudaSetDevice + cudaGetDeviceCount + cudaHostGetDevicePointer cudaDeviceSynchronize cudaFreeHost - cudaHostUnregister - cudaSetDeviceFlags - cudaSetDevice - cudaHostGetDevicePointer cudaGetDeviceProperties + cudaHostUnregister whole @@ -51,6 +51,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/simpleZeroCopy/README.md b/Samples/0_Introduction/simpleZeroCopy/README.md index 640a3096..a2f5acc8 100644 --- a/Samples/0_Introduction/simpleZeroCopy/README.md +++ b/Samples/0_Introduction/simpleZeroCopy/README.md @@ -10,7 +10,7 @@ Performance Strategies, Pinned System Paged Memory, Vector Addition ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaHostAlloc, cudaGetDeviceCount, cudaHostRegister, cudaDeviceSynchronize, cudaFreeHost, cudaHostUnregister, cudaSetDeviceFlags, cudaSetDevice, cudaHostGetDevicePointer, cudaGetDeviceProperties +cudaHostAlloc, cudaSetDeviceFlags, cudaHostRegister, cudaSetDevice, cudaGetDeviceCount, cudaHostGetDevicePointer, cudaDeviceSynchronize, cudaFreeHost, cudaGetDeviceProperties, cudaHostUnregister ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj index 46bb04d3..b7e0b9c0 100644 --- a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj +++ b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleZeroCopy.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj index 17ea198b..c7a9daed 100644 --- a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj +++ b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleZeroCopy.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2022.vcxproj b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2022.vcxproj index 6f4d0c20..29709586 100644 --- a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2022.vcxproj +++ b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleZeroCopy.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/systemWideAtomics/Makefile b/Samples/0_Introduction/systemWideAtomics/Makefile index e8019879..6832e615 100644 --- a/Samples/0_Introduction/systemWideAtomics/Makefile +++ b/Samples/0_Introduction/systemWideAtomics/Makefile @@ -303,9 +303,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 61 70 72 75 80 86 87 +SMS ?= 61 70 72 75 80 86 87 90 else -SMS ?= 60 61 70 75 80 86 +SMS ?= 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/systemWideAtomics/NsightEclipse.xml b/Samples/0_Introduction/systemWideAtomics/NsightEclipse.xml index f9099627..05284552 100644 --- a/Samples/0_Introduction/systemWideAtomics/NsightEclipse.xml +++ b/Samples/0_Introduction/systemWideAtomics/NsightEclipse.xml @@ -3,10 +3,10 @@ systemWideAtomics - cudaFree cudaDeviceSynchronize cudaMallocManaged cudaGetDeviceProperties + cudaFree whole @@ -44,6 +44,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/systemWideAtomics/README.md b/Samples/0_Introduction/systemWideAtomics/README.md index c838fcd5..98f2a062 100644 --- a/Samples/0_Introduction/systemWideAtomics/README.md +++ b/Samples/0_Introduction/systemWideAtomics/README.md @@ -10,7 +10,7 @@ Atomic Intrinsics, Unified Memory ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceSynchronize, cudaMallocManaged, cudaGetDeviceProperties +cudaDeviceSynchronize, cudaMallocManaged, cudaGetDeviceProperties, cudaFree ## Dependencies needed to build/run [UVM](../../../README.md#uvm) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/template/Makefile b/Samples/0_Introduction/template/Makefile index a462a637..47f37966 100644 --- a/Samples/0_Introduction/template/Makefile +++ b/Samples/0_Introduction/template/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/template/NsightEclipse.xml b/Samples/0_Introduction/template/NsightEclipse.xml index e043d389..21fe74fb 100644 --- a/Samples/0_Introduction/template/NsightEclipse.xml +++ b/Samples/0_Introduction/template/NsightEclipse.xml @@ -4,8 +4,8 @@ template cudaMalloc - cudaFree cudaMemcpy + cudaFree whole @@ -41,6 +41,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/template/README.md b/Samples/0_Introduction/template/README.md index afcf51e1..f2244569 100644 --- a/Samples/0_Introduction/template/README.md +++ b/Samples/0_Introduction/template/README.md @@ -10,7 +10,7 @@ Device Memory Allocation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaMemcpy +cudaMalloc, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/template/template_vs2017.vcxproj b/Samples/0_Introduction/template/template_vs2017.vcxproj index 01694a19..5e436781 100644 --- a/Samples/0_Introduction/template/template_vs2017.vcxproj +++ b/Samples/0_Introduction/template/template_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/template.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/template/template_vs2019.vcxproj b/Samples/0_Introduction/template/template_vs2019.vcxproj index 606bb330..f736fc28 100644 --- a/Samples/0_Introduction/template/template_vs2019.vcxproj +++ b/Samples/0_Introduction/template/template_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/template.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/template/template_vs2022.vcxproj b/Samples/0_Introduction/template/template_vs2022.vcxproj index cc552ab4..6a6d8744 100644 --- a/Samples/0_Introduction/template/template_vs2022.vcxproj +++ b/Samples/0_Introduction/template/template_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/template.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/0_Introduction/vectorAdd/Makefile b/Samples/0_Introduction/vectorAdd/Makefile index 3ecf5e49..62be1498 100644 --- a/Samples/0_Introduction/vectorAdd/Makefile +++ b/Samples/0_Introduction/vectorAdd/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/0_Introduction/vectorAdd/NsightEclipse.xml b/Samples/0_Introduction/vectorAdd/NsightEclipse.xml index 0df66051..353acee5 100644 --- a/Samples/0_Introduction/vectorAdd/NsightEclipse.xml +++ b/Samples/0_Introduction/vectorAdd/NsightEclipse.xml @@ -3,11 +3,11 @@ vectorAdd - cudaFree - cudaMalloc - cudaGetLastError cudaMemcpy cudaGetErrorString + cudaFree + cudaGetLastError + cudaMalloc whole @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/0_Introduction/vectorAdd/README.md b/Samples/0_Introduction/vectorAdd/README.md index 8a7e4464..99523169 100644 --- a/Samples/0_Introduction/vectorAdd/README.md +++ b/Samples/0_Introduction/vectorAdd/README.md @@ -10,7 +10,7 @@ CUDA Runtime API, Vector Addition ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString +cudaMemcpy, cudaGetErrorString, cudaFree, cudaGetLastError, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2017.vcxproj b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2017.vcxproj index 6529e270..e8af3149 100644 --- a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2017.vcxproj +++ b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/vectorAdd.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2019.vcxproj b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2019.vcxproj index 295f1945..a25492e3 100644 --- a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2019.vcxproj +++ b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/vectorAdd.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2022.vcxproj b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2022.vcxproj index 4da53146..c983b88b 100644 --- a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2022.vcxproj +++ b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/vectorAdd.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/vectorAddDrv/Makefile b/Samples/0_Introduction/vectorAddDrv/Makefile index c21fa942..472417b1 100644 --- a/Samples/0_Introduction/vectorAddDrv/Makefile +++ b/Samples/0_Introduction/vectorAddDrv/Makefile @@ -283,9 +283,9 @@ FATBIN_FILE := vectorAdd_kernel${TARGET_SIZE}.fatbin # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(GENCODE_FLAGS),) diff --git a/Samples/0_Introduction/vectorAddDrv/README.md b/Samples/0_Introduction/vectorAddDrv/README.md index 09612d23..ac26085a 100644 --- a/Samples/0_Introduction/vectorAddDrv/README.md +++ b/Samples/0_Introduction/vectorAddDrv/README.md @@ -10,7 +10,7 @@ CUDA Driver API, Vector Addition ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuModuleLoadData, cuCtxCreate, cuLaunchKernel, cuMemAlloc, cuMemcpyDtoH, cuCtxSynchronize, cuMemFree, cuInit, cuCtxDestroy, cuMemcpyHtoD +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuModuleLoadData, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuCtxDestroy, cuModuleGetFunction, cuCtxCreate, cuInit ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2017.vcxproj b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2017.vcxproj index bfd45966..57b06693 100644 --- a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2017.vcxproj +++ b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/vectorAddDrv.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2019.vcxproj b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2019.vcxproj index d1119c07..c48b9e14 100644 --- a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2019.vcxproj +++ b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/vectorAddDrv.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2022.vcxproj b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2022.vcxproj index 49a0a5fb..4f04109b 100644 --- a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2022.vcxproj +++ b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/vectorAddDrv.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/vectorAddMMAP/Makefile b/Samples/0_Introduction/vectorAddMMAP/Makefile index 72b2eaa4..743d2bdd 100644 --- a/Samples/0_Introduction/vectorAddMMAP/Makefile +++ b/Samples/0_Introduction/vectorAddMMAP/Makefile @@ -307,9 +307,9 @@ FATBIN_FILE := vectorAdd_kernel${TARGET_SIZE}.fatbin # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(GENCODE_FLAGS),) diff --git a/Samples/0_Introduction/vectorAddMMAP/README.md b/Samples/0_Introduction/vectorAddMMAP/README.md index 786e136f..6dbbcae7 100644 --- a/Samples/0_Introduction/vectorAddMMAP/README.md +++ b/Samples/0_Introduction/vectorAddMMAP/README.md @@ -10,7 +10,7 @@ CUDA Driver API, Vector Addition, MMAP ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuMemSetAccess, cuInit, cuMemAddressReserve, cuModuleGetFunction, cuCtxDestroy, cuCtxCreate, cuMemAddressFree, cuMemGetAllocationGranularity, cuMemUnmap, cuDeviceGetAttribute, cuMemRelease, cuModuleLoadData, cuMemMap, cuMemCreate, cuMemcpyHtoD, cuDeviceCanAccessPeer, cuDeviceGetCount, cuLaunchKernel, cuMemcpyDtoH +cuMemcpyDtoH, cuDeviceCanAccessPeer, cuModuleGetFunction, cuMemSetAccess, cuMemRelease, cuInit, cuMemcpyHtoD, cuLaunchKernel, cuMemCreate, cuModuleLoadData, cuCtxDestroy, cuDeviceGetCount, cuMemMap, cuDeviceGetAttribute, cuMemGetAllocationGranularity, cuMemAddressFree, cuMemUnmap, cuCtxCreate, cuMemAddressReserve ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj index 6aa6aa9e..00641d9b 100644 --- a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj +++ b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/vectorAddMMAP.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -112,6 +112,6 @@ - + diff --git a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj index ece800ca..a4885080 100644 --- a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj +++ b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/vectorAddMMAP.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2022.vcxproj b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2022.vcxproj index 81fe95b6..59ad3c82 100644 --- a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2022.vcxproj +++ b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/vectorAddMMAP.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/README.md b/Samples/0_Introduction/vectorAdd_nvrtc/README.md index 5e83b512..03ed2a74 100644 --- a/Samples/0_Introduction/vectorAdd_nvrtc/README.md +++ b/Samples/0_Introduction/vectorAdd_nvrtc/README.md @@ -10,7 +10,7 @@ CUDA Driver API, Vector Addition, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) cudaBlockSize, cudaGridSize @@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj index c8331245..1ad04a6d 100644 --- a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj +++ b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj index 27cd03de..e140f5eb 100644 --- a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj +++ b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2022.vcxproj index db61b475..a575fb82 100644 --- a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2022.vcxproj +++ b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/1_Utilities/bandwidthTest/Makefile b/Samples/1_Utilities/bandwidthTest/Makefile index 31f60de5..8699a8bc 100644 --- a/Samples/1_Utilities/bandwidthTest/Makefile +++ b/Samples/1_Utilities/bandwidthTest/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/1_Utilities/bandwidthTest/NsightEclipse.xml b/Samples/1_Utilities/bandwidthTest/NsightEclipse.xml index 840ab273..6078a765 100644 --- a/Samples/1_Utilities/bandwidthTest/NsightEclipse.xml +++ b/Samples/1_Utilities/bandwidthTest/NsightEclipse.xml @@ -3,22 +3,22 @@ bandwidthTest - cudaFree - cudaEventRecord - cudaMallocHost cudaHostAlloc - cudaEventCreate - cudaGetDeviceCount - cudaEventElapsedTime - cudaDeviceSynchronize - cudaFreeHost - cudaMalloc - cudaEventDestroy - cudaSetDevice - cudaMemcpyAsync cudaMemcpy + cudaMalloc + cudaMemcpyAsync + cudaFree cudaGetErrorString + cudaMallocHost + cudaSetDevice cudaGetDeviceProperties + cudaDeviceSynchronize + cudaEventRecord + cudaFreeHost + cudaEventDestroy + cudaEventElapsedTime + cudaGetDeviceCount + cudaEventCreate whole @@ -58,6 +58,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/1_Utilities/bandwidthTest/README.md b/Samples/1_Utilities/bandwidthTest/README.md index 32e4f779..2cf0bad7 100644 --- a/Samples/1_Utilities/bandwidthTest/README.md +++ b/Samples/1_Utilities/bandwidthTest/README.md @@ -10,7 +10,7 @@ CUDA Streams and Events, Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaEventRecord, cudaMallocHost, cudaHostAlloc, cudaEventCreate, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpyAsync, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaHostAlloc, cudaMemcpy, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaGetErrorString, cudaMallocHost, cudaSetDevice, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2017.vcxproj b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2017.vcxproj index 38f1b77f..ad862bff 100644 --- a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2017.vcxproj +++ b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/bandwidthTest.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2019.vcxproj b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2019.vcxproj index 59300fc1..e6f5f304 100644 --- a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2019.vcxproj +++ b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bandwidthTest.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2022.vcxproj b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2022.vcxproj index 9faf45d5..676302bd 100644 --- a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2022.vcxproj +++ b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bandwidthTest.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/1_Utilities/deviceQuery/Makefile b/Samples/1_Utilities/deviceQuery/Makefile index ea0d25c1..44dd2fbc 100644 --- a/Samples/1_Utilities/deviceQuery/Makefile +++ b/Samples/1_Utilities/deviceQuery/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/1_Utilities/deviceQuery/NsightEclipse.xml b/Samples/1_Utilities/deviceQuery/NsightEclipse.xml index 842dea35..dda30eb7 100644 --- a/Samples/1_Utilities/deviceQuery/NsightEclipse.xml +++ b/Samples/1_Utilities/deviceQuery/NsightEclipse.xml @@ -5,12 +5,12 @@ cuDeviceGetAttribute cuSafeCallNoSync - cudaGetDeviceCount - cudaDriverGetVersion - cudaDeviceCanAccessPeer - cudaSetDevice cudaRuntimeGetVersion cudaGetErrorString + cudaDeviceCanAccessPeer + cudaSetDevice + cudaGetDeviceCount + cudaDriverGetVersion cudaGetDeviceProperties @@ -48,6 +48,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/1_Utilities/deviceQuery/README.md b/Samples/1_Utilities/deviceQuery/README.md index 794c5f76..4f4a647d 100644 --- a/Samples/1_Utilities/deviceQuery/README.md +++ b/Samples/1_Utilities/deviceQuery/README.md @@ -10,7 +10,7 @@ CUDA Runtime API, Device Query ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -26,11 +26,11 @@ x86_64, ppc64le, armv7l, aarch64 cuDeviceGetAttribute, cuSafeCallNoSync ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGetDeviceCount, cudaDriverGetVersion, cudaDeviceCanAccessPeer, cudaSetDevice, cudaRuntimeGetVersion, cudaGetErrorString, cudaGetDeviceProperties +cudaRuntimeGetVersion, cudaGetErrorString, cudaDeviceCanAccessPeer, cudaSetDevice, cudaGetDeviceCount, cudaDriverGetVersion, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2017.vcxproj b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2017.vcxproj index e52b7e5f..87cca12f 100644 --- a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2017.vcxproj +++ b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/deviceQuery.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2019.vcxproj b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2019.vcxproj index 3b4b1f75..41b5bebd 100644 --- a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2019.vcxproj +++ b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/deviceQuery.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2022.vcxproj b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2022.vcxproj index 4d9684d9..4ba036e5 100644 --- a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2022.vcxproj +++ b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/deviceQuery.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/1_Utilities/deviceQueryDrv/NsightEclipse.xml b/Samples/1_Utilities/deviceQueryDrv/NsightEclipse.xml index 999dc70e..7a56e697 100644 --- a/Samples/1_Utilities/deviceQueryDrv/NsightEclipse.xml +++ b/Samples/1_Utilities/deviceQueryDrv/NsightEclipse.xml @@ -3,13 +3,14 @@ deviceQueryDrv - cuDeviceCanAccessPeer - cuDriverGetVersion - cuDeviceGetCount cuDeviceGetName - cuDeviceTotalMem - cuInit cuDeviceGetAttribute + cuDeviceTotalMem + cuDeviceCanAccessPeer + cuDeviceGetCount + cuDriverGetVersion + cuInit + cudaSetDevice whole @@ -49,6 +50,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/1_Utilities/deviceQueryDrv/README.md b/Samples/1_Utilities/deviceQueryDrv/README.md index 5d80066c..92d02352 100644 --- a/Samples/1_Utilities/deviceQueryDrv/README.md +++ b/Samples/1_Utilities/deviceQueryDrv/README.md @@ -10,7 +10,7 @@ CUDA Driver API, Device Query ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuDeviceCanAccessPeer, cuDriverGetVersion, cuDeviceGetCount, cuDeviceGetName, cuDeviceTotalMem, cuInit, cuDeviceGetAttribute +cuDeviceGetName, cuDeviceGetAttribute, cuDeviceTotalMem, cuDeviceCanAccessPeer, cuDeviceGetCount, cuDriverGetVersion, cuInit + +### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) +cudaSetDevice ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2017.vcxproj b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2017.vcxproj index a94caee2..59f77b9b 100644 --- a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2017.vcxproj +++ b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2019.vcxproj b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2019.vcxproj index 282fef95..629a2e6e 100644 --- a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2019.vcxproj +++ b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2022.vcxproj b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2022.vcxproj index 30aaeef4..5c8aab7e 100644 --- a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2022.vcxproj +++ b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/1_Utilities/topologyQuery/Makefile b/Samples/1_Utilities/topologyQuery/Makefile index ea562174..9a48838c 100644 --- a/Samples/1_Utilities/topologyQuery/Makefile +++ b/Samples/1_Utilities/topologyQuery/Makefile @@ -297,9 +297,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/1_Utilities/topologyQuery/NsightEclipse.xml b/Samples/1_Utilities/topologyQuery/NsightEclipse.xml index 722261a6..8bfd757d 100644 --- a/Samples/1_Utilities/topologyQuery/NsightEclipse.xml +++ b/Samples/1_Utilities/topologyQuery/NsightEclipse.xml @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/1_Utilities/topologyQuery/README.md b/Samples/1_Utilities/topologyQuery/README.md index 4f4093fa..e08fa339 100644 --- a/Samples/1_Utilities/topologyQuery/README.md +++ b/Samples/1_Utilities/topologyQuery/README.md @@ -10,7 +10,7 @@ Performance Strategies, Multi-GPU ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -27,7 +27,7 @@ cudaGetDeviceCount, cudaDeviceGetAttribute ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2017.vcxproj b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2017.vcxproj index df094c22..feecd32f 100644 --- a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2017.vcxproj +++ b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/topologyQuery.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2019.vcxproj b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2019.vcxproj index 62f21c12..245f929b 100644 --- a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2019.vcxproj +++ b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/topologyQuery.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2022.vcxproj b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2022.vcxproj index 40ba0ca7..1d81b933 100644 --- a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2022.vcxproj +++ b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/topologyQuery.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/Makefile b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/Makefile index 0200b235..dabf0516 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/Makefile +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/Makefile @@ -301,9 +301,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/NsightEclipse.xml index 0ab4b349..6df81e06 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/NsightEclipse.xml @@ -3,42 +3,42 @@ EGLStream_CUDA_CrossGPU - cuEGLStreamConsumerConnect - cuMemFree - cuInit - cuStreamCreate - cuCtxCreate - cuGraphicsResourceGetMappedEglFrame cuDeviceGetName - cuCtxSynchronize - cuEGLStreamConsumerAcquireFrame - cuDeviceGet - cuDeviceGetAttribute - cuMemAlloc cuEGLStreamConsumerReleaseFrame + cuEGLStreamConsumerConnect + cuEGLStreamConsumerDisconnect + cuCtxPushCurrent + cuEGLStreamProducerReturnFrame + cuStreamCreate + cuEGLStreamProducerPresentFrame + cuMemFree + cuGraphicsResourceGetMappedEglFrame + cuInit + cuMemcpyHtoD + cuDeviceGet + cuEGLStreamConsumerAcquireFrame cuEGLStreamProducerDisconnect cuEGLStreamProducerConnect - cuEGLStreamConsumerDisconnect - cuMemcpyHtoD - cuEGLStreamProducerReturnFrame - cuCtxPushCurrent + cuDeviceGetAttribute + cuCtxSynchronize + cuMemAlloc cuCtxPopCurrent - cuEGLStreamProducerPresentFrame - cudaDeviceCreateConsumer - cudaFree - cudaConsumerReleaseFrame - cudaDeviceSynchronize - cudaGetValueMismatch - cudaProducerDeinit - cudaProducerPresentFrame - cudaMalloc - cudaProducerInit - cudaProducerReturnFrame - cudaProducerPrepareFrame - cudaConsumerAcquireFrame + cuCtxCreate cudaMemcpy + cudaMalloc + cudaProducerPresentFrame + cudaFree cudaGetErrorString + cudaConsumerReleaseFrame + cudaProducerReturnFrame + cudaDeviceSynchronize cudaDeviceCreateProducer + cudaProducerDeinit + cudaProducerPrepareFrame + cudaGetValueMismatch + cudaConsumerAcquireFrame + cudaProducerInit + cudaDeviceCreateConsumer whole @@ -81,6 +81,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md index 2178db66..b559583b 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md @@ -10,7 +10,7 @@ EGLStreams Interop ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,17 +23,17 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuEGLStreamConsumerConnect, cuMemFree, cuInit, cuStreamCreate, cuCtxCreate, cuGraphicsResourceGetMappedEglFrame, cuDeviceGetName, cuCtxSynchronize, cuEGLStreamConsumerAcquireFrame, cuDeviceGet, cuDeviceGetAttribute, cuMemAlloc, cuEGLStreamConsumerReleaseFrame, cuEGLStreamProducerDisconnect, cuEGLStreamProducerConnect, cuEGLStreamConsumerDisconnect, cuMemcpyHtoD, cuEGLStreamProducerReturnFrame, cuCtxPushCurrent, cuCtxPopCurrent, cuEGLStreamProducerPresentFrame +cuDeviceGetName, cuEGLStreamConsumerReleaseFrame, cuEGLStreamConsumerConnect, cuEGLStreamConsumerDisconnect, cuCtxPushCurrent, cuEGLStreamProducerReturnFrame, cuStreamCreate, cuEGLStreamProducerPresentFrame, cuMemFree, cuGraphicsResourceGetMappedEglFrame, cuInit, cuMemcpyHtoD, cuDeviceGet, cuEGLStreamConsumerAcquireFrame, cuEGLStreamProducerDisconnect, cuEGLStreamProducerConnect, cuDeviceGetAttribute, cuCtxSynchronize, cuMemAlloc, cuCtxPopCurrent, cuCtxCreate ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceCreateConsumer, cudaFree, cudaConsumerReleaseFrame, cudaDeviceSynchronize, cudaGetValueMismatch, cudaProducerDeinit, cudaProducerPresentFrame, cudaMalloc, cudaProducerInit, cudaProducerReturnFrame, cudaProducerPrepareFrame, cudaConsumerAcquireFrame, cudaMemcpy, cudaGetErrorString, cudaDeviceCreateProducer +cudaMemcpy, cudaMalloc, cudaProducerPresentFrame, cudaFree, cudaGetErrorString, cudaConsumerReleaseFrame, cudaProducerReturnFrame, cudaDeviceSynchronize, cudaDeviceCreateProducer, cudaProducerDeinit, cudaProducerPrepareFrame, cudaGetValueMismatch, cudaConsumerAcquireFrame, cudaProducerInit, cudaDeviceCreateConsumer ## Dependencies needed to build/run [EGL](../../../README.md#egl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/findegl.mk b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/findegl.mk index cfeee899..33ec1a96 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/findegl.mk +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/findegl.mk @@ -58,6 +58,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -84,22 +85,12 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif - ifeq ("$(SUSE)","0") + + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/NsightEclipse.xml index d84d9313..ce22364a 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/NsightEclipse.xml @@ -3,23 +3,36 @@ EGLStream_CUDA_Interop - cuDeviceGet - cuDeviceGetAttribute - cuDeviceComputeCapability - cuDeviceGetCount + cuMemcpyDtoH cuDeviceGetName - cuGraphicsResourceGetMappedEglFrame - cuEGLStreamConsumerAcquireFrame cuEGLStreamConsumerReleaseFrame - cuEGLStreamProducerPresentFrame - cuCtxCreate - cuMemAlloc - cuMemFree - cuMemcpy3D - cuStreamCreate + cuEGLStreamConsumerConnect + cuEGLStreamConsumerDisconnect cuCtxPushCurrent + cuArrayDestroy + cuEGLStreamProducerReturnFrame + cuEGLStreamProducerPresentFrame + cuMemFree + cuGraphicsResourceGetMappedEglFrame + cuInit + cuEGLStreamConsumerAcquireFrame + cuEGLStreamProducerDisconnect + cuDeviceGetCount + cuEGLStreamProducerConnect + cuDeviceGetAttribute + cuCtxSynchronize + cuMemAlloc cuCtxPopCurrent - cuCtxDestroy + cuCtxCreate + cuMemcpy + cudaProducerReadYUVFrame + cudaProducerTest + cudaProducerDeinit + cudaDeviceCreateProducer + cudaProducerReadARGBFrame + cudaDeviceCreateConsumer + cudaConsumerTest + cudaProducerInit whole @@ -53,6 +66,7 @@ sm37 sm50 sm52 + sm53 sm60 sm61 sm70 @@ -60,6 +74,8 @@ sm75 sm80 sm86 + sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md index 3ccef85d..5cee12b0 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md @@ -10,7 +10,7 @@ EGLStreams Interop ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,17 +23,17 @@ x86_64, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuEGLStreamConsumerConnect, cuArrayDestroy, cuMemFree, cuInit, cuCtxCreate, cuGraphicsResourceGetMappedEglFrame, cuDeviceGetName, cuCtxSynchronize, cuEGLStreamConsumerAcquireFrame, cuDeviceGetAttribute, cuMemcpy, cuMemAlloc, cuEGLStreamConsumerReleaseFrame, cuEGLStreamProducerDisconnect, cuEGLStreamProducerConnect, cuEGLStreamConsumerDisconnect, cuDeviceGetCount, cuEGLStreamProducerReturnFrame, cuCtxPushCurrent, cuCtxPopCurrent, cuMemcpyDtoH, cuEGLStreamProducerPresentFrame +cuMemcpyDtoH, cuDeviceGetName, cuEGLStreamConsumerReleaseFrame, cuEGLStreamConsumerConnect, cuEGLStreamConsumerDisconnect, cuCtxPushCurrent, cuArrayDestroy, cuEGLStreamProducerReturnFrame, cuEGLStreamProducerPresentFrame, cuMemFree, cuGraphicsResourceGetMappedEglFrame, cuInit, cuEGLStreamConsumerAcquireFrame, cuEGLStreamProducerDisconnect, cuDeviceGetCount, cuEGLStreamProducerConnect, cuDeviceGetAttribute, cuCtxSynchronize, cuMemAlloc, cuCtxPopCurrent, cuCtxCreate, cuMemcpy ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceCreateConsumer, cudaConsumerTest, cudaProducerDeinit, cudaProducerInit, cudaProducerReadYUVFrame, cudaProducerTest, cudaProducerReadARGBFrame, cudaDeviceCreateProducer +cudaProducerReadYUVFrame, cudaProducerTest, cudaProducerDeinit, cudaDeviceCreateProducer, cudaProducerReadARGBFrame, cudaDeviceCreateConsumer, cudaConsumerTest, cudaProducerInit ## Dependencies needed to build/run [EGL](../../../README.md#egl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp index f15c5cd1..ef3adab2 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp @@ -301,7 +301,7 @@ CUresult cudaDeviceCreateProducer(test_cuda_producer_s *cudaProducer, if (major < 6) { printf( - "EGLStreams_CUDA_Interop requires SM 6.0 or higher arch GPU. " + "EGLStream_CUDA_Interop requires SM 6.0 or higher arch GPU. " "Exiting...\n"); exit(2); // EXIT_WAIVED } diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/findegl.mk b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/findegl.mk index cfeee899..33ec1a96 100644 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/findegl.mk +++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/findegl.mk @@ -58,6 +58,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -84,22 +85,12 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif - ifeq ("$(SUSE)","0") + + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/Makefile b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/Makefile index 9a670c78..86cfb928 100644 --- a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/Makefile +++ b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/Makefile @@ -321,9 +321,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/NsightEclipse.xml index fbb91f1f..63995525 100644 --- a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/NsightEclipse.xml @@ -3,27 +3,27 @@ EGLSync_CUDAEvent_Interop - cuGraphicsEGLRegisterImage - cuStreamCreate - cuEventCreate - cuCtxCreate - cuGraphicsSubResourceGetMappedArray - cuGraphicsUnregisterResource - cuCtxSynchronize - cuEventCreateFromEGLSync - cuEventDestroy - cuStreamWaitEvent - cuCtxPushCurrent - cuSurfObjectCreate - cuInit cuEventRecord cuDeviceGetAttribute + cuEventCreate + cuCtxSynchronize + cuEventDestroy + cuGraphicsEGLRegisterImage + cuGraphicsSubResourceGetMappedArray + cuStreamCreate + cuStreamWaitEvent + cuGraphicsUnregisterResource + cuCtxCreate + cuSurfObjectCreate + cuEventCreateFromEGLSync + cuCtxPushCurrent + cuInit + cudaMemcpy + cudaGetErrorString cudaFree cudaDeviceSynchronize cudaGetValueMismatch cudaMalloc - cudaMemcpy - cudaGetErrorString whole @@ -72,6 +72,7 @@ sm80 sm86 sm87 + sm90 arm diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/README.md b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/README.md index 655881d1..8c980b52 100644 --- a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/README.md +++ b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/README.md @@ -10,7 +10,7 @@ EGLSync-CUDAEvent Interop, EGLImage-CUDA Interop ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,17 +23,17 @@ armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuGraphicsEGLRegisterImage, cuStreamCreate, cuEventCreate, cuCtxCreate, cuGraphicsSubResourceGetMappedArray, cuGraphicsUnregisterResource, cuCtxSynchronize, cuEventCreateFromEGLSync, cuEventDestroy, cuStreamWaitEvent, cuCtxPushCurrent, cuSurfObjectCreate, cuInit, cuEventRecord, cuDeviceGetAttribute +cuEventRecord, cuDeviceGetAttribute, cuEventCreate, cuCtxSynchronize, cuEventDestroy, cuGraphicsEGLRegisterImage, cuGraphicsSubResourceGetMappedArray, cuStreamCreate, cuStreamWaitEvent, cuGraphicsUnregisterResource, cuCtxCreate, cuSurfObjectCreate, cuEventCreateFromEGLSync, cuCtxPushCurrent, cuInit ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceSynchronize, cudaGetValueMismatch, cudaMalloc, cudaMemcpy, cudaGetErrorString +cudaMemcpy, cudaGetErrorString, cudaFree, cudaDeviceSynchronize, cudaGetValueMismatch, cudaMalloc ## Dependencies needed to build/run [EGL](../../../README.md#egl), [EGLSync](../../../README.md#eglsync), [X11](../../../README.md#x11), [GLES](../../../README.md#gles) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/findegl.mk b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/findegl.mk index cfeee899..33ec1a96 100644 --- a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/findegl.mk +++ b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/findegl.mk @@ -58,6 +58,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -84,22 +85,12 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif - ifeq ("$(SUSE)","0") + + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2017.vcxproj index 93f0c4b6..e80efd5b 100644 --- a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/FunctionPointers.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2019.vcxproj index a67cbbcd..d5cbccdc 100644 --- a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/FunctionPointers.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2022.vcxproj index 45617a67..ca5eb33b 100644 --- a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/FunctionPointers.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/Makefile b/Samples/2_Concepts_and_Techniques/FunctionPointers/Makefile index 47c204fe..651a4f81 100644 --- a/Samples/2_Concepts_and_Techniques/FunctionPointers/Makefile +++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/FunctionPointers/NsightEclipse.xml index ecd90f9d..f90f7b34 100644 --- a/Samples/2_Concepts_and_Techniques/FunctionPointers/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/NsightEclipse.xml @@ -3,21 +3,21 @@ FunctionPointers - cudaMemcpyToSymbol - cudaFree - cudaGraphicsMapResources - cudaFreeArray - cudaGraphicsGLRegisterBuffer - cudaGraphicsResourceGetMappedPointer - cudaDeviceSynchronize - cudaDestroyTextureObject - cudaCreateTextureObject - cudaMalloc - cudaMallocArray - cudaGraphicsUnregisterResource cudaGraphicsUnmapResources cudaMemcpy + cudaMallocArray + cudaFreeArray + cudaFree cudaMemcpyFromSymbol + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDestroyTextureObject + cudaDeviceSynchronize + cudaCreateTextureObject + cudaMemcpyToSymbol + cudaGraphicsUnregisterResource + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -88,6 +88,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/README.md b/Samples/2_Concepts_and_Techniques/FunctionPointers/README.md index a95ba59d..83e16413 100644 --- a/Samples/2_Concepts_and_Techniques/FunctionPointers/README.md +++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaMemcpyFromSymbol +cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMemcpyFromSymbol, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemcpyToSymbol, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/findgllib.mk b/Samples/2_Concepts_and_Techniques/FunctionPointers/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/2_Concepts_and_Techniques/FunctionPointers/findgllib.mk +++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2017.vcxproj index 7826468d..9b6616f6 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/MC_EstimatePiInlineP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2019.vcxproj index accf4529..fd17c3e2 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_EstimatePiInlineP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2022.vcxproj index 6ba15531..9d5110d5 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_EstimatePiInlineP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/Makefile b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/Makefile index 6bc2ee93..c4a3fa53 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/Makefile +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/NsightEclipse.xml index 5c63395b..bf9c24d2 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/NsightEclipse.xml @@ -3,13 +3,13 @@ MC_EstimatePiInlineP - cudaFree - cudaFuncGetAttributes - cudaGetDeviceCount - cudaMalloc - cudaSetDevice cudaMemcpy cudaGetErrorString + cudaFree + cudaSetDevice + cudaGetDeviceCount + cudaMalloc + cudaFuncGetAttributes cudaGetDeviceProperties @@ -60,6 +60,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md index 53b09b57..a7d40678 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md @@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties ## Dependencies needed to build/run [CURAND](../../../README.md#curand) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2017.vcxproj index d5ad649b..9ac21844 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/MC_EstimatePiInlineQ.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2019.vcxproj index bdf88dd5..b8246a1a 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_EstimatePiInlineQ.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2022.vcxproj index 4a8efc23..da748e1f 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_EstimatePiInlineQ.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/Makefile b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/Makefile index 20a15720..5d8b086b 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/Makefile +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/NsightEclipse.xml index ed2f8f7a..f087e82b 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/NsightEclipse.xml @@ -3,13 +3,13 @@ MC_EstimatePiInlineQ - cudaFree - cudaFuncGetAttributes - cudaGetDeviceCount - cudaMalloc - cudaSetDevice cudaMemcpy cudaGetErrorString + cudaFree + cudaSetDevice + cudaGetDeviceCount + cudaMalloc + cudaFuncGetAttributes cudaGetDeviceProperties @@ -60,6 +60,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md index 96e19b72..485c16aa 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md @@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties ## Dependencies needed to build/run [CURAND](../../../README.md#curand) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2017.vcxproj index a628995c..c6418871 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/MC_EstimatePiP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2019.vcxproj index fb20f5f1..c188ae7d 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_EstimatePiP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2022.vcxproj index ec7b8f28..c9893f39 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_EstimatePiP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/Makefile b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/Makefile index fba6735f..0e5b4237 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/Makefile +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/NsightEclipse.xml index 6578c3f4..56a07dea 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/NsightEclipse.xml @@ -3,13 +3,13 @@ MC_EstimatePiP - cudaFree - cudaFuncGetAttributes - cudaGetDeviceCount - cudaMalloc - cudaSetDevice cudaMemcpy cudaGetErrorString + cudaFree + cudaSetDevice + cudaGetDeviceCount + cudaMalloc + cudaFuncGetAttributes cudaGetDeviceProperties @@ -60,6 +60,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/README.md b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/README.md index 098e77f8..4390385f 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/README.md +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/README.md @@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties ## Dependencies needed to build/run [CURAND](../../../README.md#curand) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2017.vcxproj index 83aba18a..95b72fe5 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/MC_EstimatePiQ.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2019.vcxproj index ae4a6711..ff931633 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_EstimatePiQ.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2022.vcxproj index 9e11377d..5714f941 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_EstimatePiQ.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/Makefile b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/Makefile index 98d071f9..61ae97d2 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/Makefile +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/NsightEclipse.xml index 45497eea..71f9c101 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/NsightEclipse.xml @@ -3,13 +3,13 @@ MC_EstimatePiQ - cudaFree - cudaFuncGetAttributes - cudaGetDeviceCount - cudaMalloc - cudaSetDevice cudaMemcpy cudaGetErrorString + cudaFree + cudaSetDevice + cudaGetDeviceCount + cudaMalloc + cudaFuncGetAttributes cudaGetDeviceProperties @@ -60,6 +60,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md index dd73101e..c6bac7b7 100644 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md +++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md @@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties ## Dependencies needed to build/run [CURAND](../../../README.md#curand) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2017.vcxproj index df94f0e1..c9d46e45 100644 --- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/MC_SingleAsianOptionP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -112,6 +112,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2019.vcxproj index c6830be4..1a31f284 100644 --- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_SingleAsianOptionP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2022.vcxproj index ab727ab0..a97d1d42 100644 --- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MC_SingleAsianOptionP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/Makefile b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/Makefile index c2e3b080..97baec61 100644 --- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/Makefile +++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/NsightEclipse.xml index 52a3844d..e11b104e 100644 --- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/NsightEclipse.xml @@ -3,13 +3,13 @@ MC_SingleAsianOptionP - cudaFree - cudaFuncGetAttributes - cudaGetDeviceCount - cudaMalloc - cudaSetDevice cudaMemcpy cudaGetErrorString + cudaFree + cudaSetDevice + cudaGetDeviceCount + cudaMalloc + cudaFuncGetAttributes cudaGetDeviceProperties @@ -61,6 +61,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md index f2a986d5..f6f6cd1a 100644 --- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md +++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md @@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties ## Dependencies needed to build/run [CURAND](../../../README.md#curand) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/README.md b/Samples/2_Concepts_and_Techniques/README.md index 00265194..b407d80b 100644 --- a/Samples/2_Concepts_and_Techniques/README.md +++ b/Samples/2_Concepts_and_Techniques/README.md @@ -19,7 +19,7 @@ This sample demonstrates how Discrete Cosine Transform (DCT) for blocks of 8 by ### [EGLStream_CUDA_CrossGPU](./EGLStream_CUDA_CrossGPU) Demonstrates CUDA and EGL Streams interop, where consumer's EGL Stream is on one GPU and producer's on other and both consumer-producer are different processes. -### [EGLStreams_CUDA_Interop](./EGLStreams_CUDA_Interop) +### [EGLStream_CUDA_Interop](./EGLStream_CUDA_Interop) Demonstrates data exchange between CUDA and EGL Streams. ### [EGLSync_CUDAEvent_Interop](./EGLSync_CUDAEvent_Interop) diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/Makefile b/Samples/2_Concepts_and_Techniques/boxFilter/Makefile index baec273e..ef50006a 100644 --- a/Samples/2_Concepts_and_Techniques/boxFilter/Makefile +++ b/Samples/2_Concepts_and_Techniques/boxFilter/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/boxFilter/NsightEclipse.xml index 60ada986..5dc27790 100644 --- a/Samples/2_Concepts_and_Techniques/boxFilter/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/boxFilter/NsightEclipse.xml @@ -3,21 +3,21 @@ boxFilter - cudaFree - cudaGraphicsMapResources - cudaFreeArray - cudaGraphicsGLRegisterBuffer - cudaGraphicsResourceGetMappedPointer - cudaDeviceSynchronize - cudaDestroyTextureObject - cudaCreateTextureObject - cudaMalloc - cudaMallocArray - cudaCreateChannelDesc - cudaGraphicsUnregisterResource cudaGraphicsUnmapResources - cudaMemcpy + cudaCreateChannelDesc + cudaMallocArray + cudaFreeArray + cudaFree cudaGetErrorString + cudaMemcpy + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDestroyTextureObject + cudaDeviceSynchronize + cudaCreateTextureObject + cudaGraphicsUnregisterResource + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -84,6 +84,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/README.md b/Samples/2_Concepts_and_Techniques/boxFilter/README.md index f4d1299d..5be86cee 100644 --- a/Samples/2_Concepts_and_Techniques/boxFilter/README.md +++ b/Samples/2_Concepts_and_Techniques/boxFilter/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaCreateChannelDesc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetErrorString +cudaGraphicsUnmapResources, cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaFree, cudaGetErrorString, cudaMemcpy, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2017.vcxproj index f5cef656..e37e5362 100644 --- a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/boxFilter.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -119,6 +119,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2019.vcxproj index 5e2b348e..08573611 100644 --- a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/boxFilter.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -115,6 +115,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2022.vcxproj index 635ab0f1..113b623b 100644 --- a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/boxFilter.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -115,6 +115,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/findgllib.mk b/Samples/2_Concepts_and_Techniques/boxFilter/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/2_Concepts_and_Techniques/boxFilter/findgllib.mk +++ b/Samples/2_Concepts_and_Techniques/boxFilter/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/Makefile b/Samples/2_Concepts_and_Techniques/convolutionSeparable/Makefile index c189aba9..dd13e54b 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/Makefile +++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/convolutionSeparable/NsightEclipse.xml index a76eb569..6471a445 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/NsightEclipse.xml @@ -3,11 +3,11 @@ convolutionSeparable - cudaMemcpyToSymbol + cudaMemcpy cudaFree cudaDeviceSynchronize + cudaMemcpyToSymbol cudaMalloc - cudaMemcpy whole @@ -53,6 +53,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/README.md b/Samples/2_Concepts_and_Techniques/convolutionSeparable/README.md index defbd7ea..8afcf177 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/README.md +++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/README.md @@ -10,7 +10,7 @@ Image Processing, Data Parallel Algorithms ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMemcpyToSymbol, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2017.vcxproj index 4cbaabb2..12140d61 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/convolutionSeparable.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2019.vcxproj index c231867e..882b180c 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/convolutionSeparable.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2022.vcxproj index e1ae239a..0bfe92ac 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/convolutionSeparable.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/Makefile b/Samples/2_Concepts_and_Techniques/convolutionTexture/Makefile index 5eaca29f..e0631211 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionTexture/Makefile +++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/convolutionTexture/NsightEclipse.xml index 622b0ebc..c2a9e145 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionTexture/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/NsightEclipse.xml @@ -3,15 +3,15 @@ convolutionTexture - cudaMemcpyToSymbol + cudaMemcpy + cudaMallocArray + cudaFreeArray cudaFree cudaMemcpyToArray - cudaFreeArray cudaDeviceSynchronize cudaCreateTextureObject + cudaMemcpyToSymbol cudaMalloc - cudaMallocArray - cudaMemcpy whole @@ -59,6 +59,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/README.md b/Samples/2_Concepts_and_Techniques/convolutionTexture/README.md index 11ef179a..b54a396c 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionTexture/README.md +++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/README.md @@ -10,7 +10,7 @@ Image Processing, Texture, Data Parallel Algorithms ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaFree, cudaMemcpyToArray, cudaFreeArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaMemcpy +cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMemcpyToArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemcpyToSymbol, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2017.vcxproj index 442fa53b..1769a595 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/convolutionTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2019.vcxproj index 936b6fe5..1a367ce1 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/convolutionTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2022.vcxproj index 24511a34..d9ff12c8 100644 --- a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/convolutionTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/cuHook/Makefile b/Samples/2_Concepts_and_Techniques/cuHook/Makefile index 81ea73be..5986c7b7 100644 --- a/Samples/2_Concepts_and_Techniques/cuHook/Makefile +++ b/Samples/2_Concepts_and_Techniques/cuHook/Makefile @@ -329,9 +329,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/cuHook/README.md b/Samples/2_Concepts_and_Techniques/cuHook/README.md index 05833d89..39ec1cbe 100644 --- a/Samples/2_Concepts_and_Techniques/cuHook/README.md +++ b/Samples/2_Concepts_and_Techniques/cuHook/README.md @@ -12,7 +12,7 @@ Debugging ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -25,14 +25,14 @@ x86_64, ppc64le ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuDeviceGetCount, cuCtxCreate, cuMemAlloc, cuHookInfo, cuHookRegisterCallback, cuHook, cuMemFree, cuInit, cuCtxDestroy +cuHook, cuMemAlloc, cuHookInfo, cuHookRegisterCallback, cuCtxDestroy, cuMemFree, cuDeviceGetCount, cuCtxCreate, cuInit ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceReset +cudaDeviceReset, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/Makefile b/Samples/2_Concepts_and_Techniques/dct8x8/Makefile index b00e4d35..0540bc00 100644 --- a/Samples/2_Concepts_and_Techniques/dct8x8/Makefile +++ b/Samples/2_Concepts_and_Techniques/dct8x8/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/dct8x8/NsightEclipse.xml index 3bbe52f9..adb06756 100644 --- a/Samples/2_Concepts_and_Techniques/dct8x8/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/dct8x8/NsightEclipse.xml @@ -3,13 +3,13 @@ dct8x8 - cudaFree - cudaFreeArray - cudaDeviceSynchronize - cudaDestroyTextureObject - cudaMallocPitch - cudaCreateTextureObject cudaMallocArray + cudaFreeArray + cudaFree + cudaMallocPitch + cudaDestroyTextureObject + cudaDeviceSynchronize + cudaCreateTextureObject whole @@ -57,6 +57,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/README.md b/Samples/2_Concepts_and_Techniques/dct8x8/README.md index 337b2ba2..7e0e24f3 100644 --- a/Samples/2_Concepts_and_Techniques/dct8x8/README.md +++ b/Samples/2_Concepts_and_Techniques/dct8x8/README.md @@ -10,7 +10,7 @@ Image Processing, Video Compression ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaFreeArray, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaMallocArray +cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2017.vcxproj index 11ef09af..5044890e 100644 --- a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/dct8x8.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -115,6 +115,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2019.vcxproj index 7eace209..8452c9db 100644 --- a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/dct8x8.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2022.vcxproj index f7110841..b0fd7378 100644 --- a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/dct8x8.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/Makefile b/Samples/2_Concepts_and_Techniques/eigenvalues/Makefile index 93e54441..54c371ea 100644 --- a/Samples/2_Concepts_and_Techniques/eigenvalues/Makefile +++ b/Samples/2_Concepts_and_Techniques/eigenvalues/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/eigenvalues/NsightEclipse.xml index 0fce1129..74a13515 100644 --- a/Samples/2_Concepts_and_Techniques/eigenvalues/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/eigenvalues/NsightEclipse.xml @@ -4,9 +4,9 @@ eigenvalues cudaMalloc - cudaFree cudaDeviceSynchronize cudaMemcpy + cudaFree whole @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/README.md b/Samples/2_Concepts_and_Techniques/eigenvalues/README.md index 7d217766..bef2e951 100644 --- a/Samples/2_Concepts_and_Techniques/eigenvalues/README.md +++ b/Samples/2_Concepts_and_Techniques/eigenvalues/README.md @@ -10,7 +10,7 @@ Linear Algebra ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy +cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2017.vcxproj index 3f2ba5ed..e5a31279 100644 --- a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/eigenvalues.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -122,6 +122,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2019.vcxproj index 4f6a8152..8f846835 100644 --- a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/eigenvalues.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2022.vcxproj index 0fd7a89a..32faad5e 100644 --- a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/eigenvalues.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/histogram/Makefile b/Samples/2_Concepts_and_Techniques/histogram/Makefile index d35c575c..c73f8a98 100644 --- a/Samples/2_Concepts_and_Techniques/histogram/Makefile +++ b/Samples/2_Concepts_and_Techniques/histogram/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/histogram/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/histogram/NsightEclipse.xml index 6147ce83..72e7c05b 100644 --- a/Samples/2_Concepts_and_Techniques/histogram/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/histogram/NsightEclipse.xml @@ -3,10 +3,10 @@ histogram + cudaMemcpy cudaFree cudaDeviceSynchronize cudaMalloc - cudaMemcpy cudaGetDeviceProperties @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/histogram/README.md b/Samples/2_Concepts_and_Techniques/histogram/README.md index 4ec0ce47..8ddf8e58 100644 --- a/Samples/2_Concepts_and_Techniques/histogram/README.md +++ b/Samples/2_Concepts_and_Techniques/histogram/README.md @@ -10,7 +10,7 @@ Image Processing, Data Parallel Algorithms ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2017.vcxproj index f4cff9b5..39dd8378 100644 --- a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/histogram.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -110,6 +110,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2019.vcxproj index d2f26cb2..5bde10fc 100644 --- a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/histogram.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2022.vcxproj index 22496c8f..a3e05c1a 100644 --- a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/histogram.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/Makefile b/Samples/2_Concepts_and_Techniques/imageDenoising/Makefile index d452de66..37895080 100644 --- a/Samples/2_Concepts_and_Techniques/imageDenoising/Makefile +++ b/Samples/2_Concepts_and_Techniques/imageDenoising/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/imageDenoising/NsightEclipse.xml index 0fbb8631..bef88766 100644 --- a/Samples/2_Concepts_and_Techniques/imageDenoising/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/imageDenoising/NsightEclipse.xml @@ -3,19 +3,19 @@ imageDenoising - cudaFree - cudaGraphicsMapResources - cudaGraphicsUnregisterResource - cudaFreeArray - cudaGraphicsGLRegisterBuffer - cudaGraphicsResourceGetMappedPointer - cudaDeviceSynchronize - cudaCreateTextureObject - cudaMalloc - cudaMallocArray - cudaGLRegisterBufferObject cudaGraphicsUnmapResources cudaMemcpy + cudaMallocArray + cudaFreeArray + cudaFree + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDeviceSynchronize + cudaCreateTextureObject + cudaGLRegisterBufferObject + cudaGraphicsUnregisterResource + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -83,6 +83,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/README.md b/Samples/2_Concepts_and_Techniques/imageDenoising/README.md index e0ef23db..d5741a77 100644 --- a/Samples/2_Concepts_and_Techniques/imageDenoising/README.md +++ b/Samples/2_Concepts_and_Techniques/imageDenoising/README.md @@ -10,7 +10,7 @@ Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGLRegisterBufferObject, cudaGraphicsUnmapResources, cudaMemcpy +cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGLRegisterBufferObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/findgllib.mk b/Samples/2_Concepts_and_Techniques/imageDenoising/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/2_Concepts_and_Techniques/imageDenoising/findgllib.mk +++ b/Samples/2_Concepts_and_Techniques/imageDenoising/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2017.vcxproj index 866d8b33..bf03ff5b 100644 --- a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/imageDenoising.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -123,6 +123,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2019.vcxproj index c924e1a5..facb985e 100644 --- a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/imageDenoising.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -119,6 +119,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2022.vcxproj index 4af74bf3..ec0b7c63 100644 --- a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/imageDenoising.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -119,6 +119,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/Makefile b/Samples/2_Concepts_and_Techniques/inlinePTX/Makefile index 481f8a36..6b06a65e 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX/Makefile +++ b/Samples/2_Concepts_and_Techniques/inlinePTX/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/inlinePTX/NsightEclipse.xml index dd8e554c..b3298102 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/inlinePTX/NsightEclipse.xml @@ -3,15 +3,15 @@ inlinePTX + cudaMemcpy cudaFree cudaMallocHost + cudaGetLastError cudaGridSize - cudaDeviceSynchronize cudaBlockSize + cudaDeviceSynchronize cudaFreeHost cudaMalloc - cudaGetLastError - cudaMemcpy whole @@ -51,6 +51,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/README.md b/Samples/2_Concepts_and_Techniques/inlinePTX/README.md index 61009f1d..60d100d8 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX/README.md +++ b/Samples/2_Concepts_and_Techniques/inlinePTX/README.md @@ -10,7 +10,7 @@ Performance Strategies, PTX Assembly, CUDA Driver API ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMallocHost, cudaGridSize, cudaDeviceSynchronize, cudaBlockSize, cudaFreeHost, cudaMalloc, cudaGetLastError, cudaMemcpy +cudaMemcpy, cudaFree, cudaMallocHost, cudaGetLastError, cudaGridSize, cudaBlockSize, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2017.vcxproj index ef564769..b54f0eca 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/inlinePTX.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2019.vcxproj index c843d4c8..59b6d335 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/inlinePTX.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2022.vcxproj index 6e0f9bbd..955dd6ca 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/inlinePTX.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md index 8b06db79..3d99e87e 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md +++ b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md @@ -10,7 +10,7 @@ Performance Strategies, PTX Assembly, CUDA Driver API, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuModuleGetFunction +cuMemcpyDtoH, cuLaunchKernel, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) cudaBlockSize, cudaGridSize @@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2017.vcxproj index 7b167dbe..3d8dcc9f 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2019.vcxproj index 4e9539a9..be53ad45 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2022.vcxproj index 3eb36cb7..87de14f3 100644 --- a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/interval/Makefile b/Samples/2_Concepts_and_Techniques/interval/Makefile index bb0e46f9..69cf0f08 100644 --- a/Samples/2_Concepts_and_Techniques/interval/Makefile +++ b/Samples/2_Concepts_and_Techniques/interval/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/interval/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/interval/NsightEclipse.xml index 5b7f3004..33d957a0 100644 --- a/Samples/2_Concepts_and_Techniques/interval/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/interval/NsightEclipse.xml @@ -3,19 +3,19 @@ interval - cudaDeviceSetLimit - cudaFree - cudaFuncSetCacheConfig - cudaEventRecord - cudaEventCreate - cudaEventElapsedTime - cudaDeviceSynchronize - cudaSetDevice - cudaMalloc - cudaEventDestroy - cudaGetLastError cudaMemcpy + cudaFuncSetCacheConfig + cudaMalloc + cudaFree + cudaGetLastError + cudaSetDevice + cudaDeviceSynchronize + cudaEventRecord + cudaDeviceSetLimit + cudaEventDestroy + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -55,6 +55,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/interval/README.md b/Samples/2_Concepts_and_Techniques/interval/README.md index 2d336b2e..d13b6e9a 100644 --- a/Samples/2_Concepts_and_Techniques/interval/README.md +++ b/Samples/2_Concepts_and_Techniques/interval/README.md @@ -10,7 +10,7 @@ Recursion, Templates ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceSetLimit, cudaFree, cudaFuncSetCacheConfig, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaSetDevice, cudaMalloc, cudaEventDestroy, cudaGetLastError, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFuncSetCacheConfig, cudaMalloc, cudaFree, cudaGetLastError, cudaSetDevice, cudaDeviceSynchronize, cudaEventRecord, cudaDeviceSetLimit, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/interval/interval_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/interval/interval_vs2017.vcxproj index 937345f7..2c71346d 100644 --- a/Samples/2_Concepts_and_Techniques/interval/interval_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/interval/interval_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/interval.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -213,6 +213,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/interval/interval_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/interval/interval_vs2019.vcxproj index 0b54fff8..43bea230 100644 --- a/Samples/2_Concepts_and_Techniques/interval/interval_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/interval/interval_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/interval.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -209,6 +209,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/interval/interval_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/interval/interval_vs2022.vcxproj index 4f5e0b4c..ff04d5b6 100644 --- a/Samples/2_Concepts_and_Techniques/interval/interval_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/interval/interval_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/interval.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -209,6 +209,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/particles/Makefile b/Samples/2_Concepts_and_Techniques/particles/Makefile index f322ac64..62b19bfb 100644 --- a/Samples/2_Concepts_and_Techniques/particles/Makefile +++ b/Samples/2_Concepts_and_Techniques/particles/Makefile @@ -324,9 +324,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/particles/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/particles/NsightEclipse.xml index 0b16f690..b3fcd5ad 100644 --- a/Samples/2_Concepts_and_Techniques/particles/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/particles/NsightEclipse.xml @@ -6,19 +6,19 @@ --std=c++14 - cudaMemcpyToSymbol - cudaMemset - cudaFree - cudaGraphicsMapResources - cudaGLInit - cudaGraphicsGLRegisterBuffer - cudaGraphicsResourceGetMappedPointer - cudaDeviceSynchronize - cudaInit - cudaMalloc - cudaGraphicsUnregisterResource cudaGraphicsUnmapResources cudaMemcpy + cudaFree + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDeviceSynchronize + cudaMemset + cudaMemcpyToSymbol + cudaGraphicsGLRegisterBuffer + cudaGraphicsUnregisterResource + cudaMalloc + cudaInit + cudaGLInit " to the command line will allow users to set # of particles for simulation. This example implements a uniform grid data structure using either atomic operations or a fast radix sort from the Thrust library]]> whole @@ -83,6 +83,7 @@ sm80 sm86 sm87 + sm90 ..\..\..\Common\param.h ..\..\..\Common\paramgl.h diff --git a/Samples/2_Concepts_and_Techniques/particles/README.md b/Samples/2_Concepts_and_Techniques/particles/README.md index 0e1b6134..3b1a6974 100644 --- a/Samples/2_Concepts_and_Techniques/particles/README.md +++ b/Samples/2_Concepts_and_Techniques/particles/README.md @@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation, Perform ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGLInit, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaInit, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy +cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaMemset, cudaMemcpyToSymbol, cudaGraphicsGLRegisterBuffer, cudaGraphicsUnregisterResource, cudaMalloc, cudaInit, cudaGLInit ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/particles/findgllib.mk b/Samples/2_Concepts_and_Techniques/particles/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/2_Concepts_and_Techniques/particles/findgllib.mk +++ b/Samples/2_Concepts_and_Techniques/particles/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/2_Concepts_and_Techniques/particles/particles_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/particles/particles_vs2017.vcxproj index 8f28c764..d2c2a6f8 100644 --- a/Samples/2_Concepts_and_Techniques/particles/particles_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/particles/particles_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/particles.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -129,6 +129,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/particles/particles_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/particles/particles_vs2019.vcxproj index fba7fdcc..c739cc74 100644 --- a/Samples/2_Concepts_and_Techniques/particles/particles_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/particles/particles_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/particles.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -125,6 +125,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/particles/particles_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/particles/particles_vs2022.vcxproj index a915f4c0..77b78a23 100644 --- a/Samples/2_Concepts_and_Techniques/particles/particles_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/particles/particles_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/particles.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -125,6 +125,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/Makefile b/Samples/2_Concepts_and_Techniques/radixSortThrust/Makefile index df5ee47f..459e9a21 100644 --- a/Samples/2_Concepts_and_Techniques/radixSortThrust/Makefile +++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/Makefile @@ -304,9 +304,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/radixSortThrust/NsightEclipse.xml index c29e9f17..62fd4d55 100644 --- a/Samples/2_Concepts_and_Techniques/radixSortThrust/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/NsightEclipse.xml @@ -6,13 +6,13 @@ --std=c++14 - cudaEventRecord - cudaEventCreate - cudaEventElapsedTime cudaEventSynchronize - cudaEventDestroy - cudaGetDeviceProperties + cudaEventRecord cudaGetDevice + cudaEventDestroy + cudaEventElapsedTime + cudaGetDeviceProperties + cudaEventCreate whole @@ -58,6 +58,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/README.md b/Samples/2_Concepts_and_Techniques/radixSortThrust/README.md index 835bfd17..4914b5d2 100644 --- a/Samples/2_Concepts_and_Techniques/radixSortThrust/README.md +++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/README.md @@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaEventDestroy, cudaGetDeviceProperties, cudaGetDevice +cudaEventSynchronize, cudaEventRecord, cudaGetDevice, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2017.vcxproj index 159d441e..a9c39d2e 100644 --- a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/radixSortThrust.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2019.vcxproj index 244e1889..9ac46af9 100644 --- a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/radixSortThrust.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2022.vcxproj index edef2e2d..30b2d9bb 100644 --- a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/radixSortThrust.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/reduction/Makefile b/Samples/2_Concepts_and_Techniques/reduction/Makefile index a46a5bb3..eed9f801 100644 --- a/Samples/2_Concepts_and_Techniques/reduction/Makefile +++ b/Samples/2_Concepts_and_Techniques/reduction/Makefile @@ -310,9 +310,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/reduction/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/reduction/NsightEclipse.xml index e7fa89ac..629ec3ff 100644 --- a/Samples/2_Concepts_and_Techniques/reduction/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/reduction/NsightEclipse.xml @@ -6,13 +6,13 @@ --std=c++11 - cudaFree - cudaDeviceSynchronize - cudaMalloc - cudaSetDevice cudaMemcpy - cudaGetDeviceProperties + cudaFree + cudaSetDevice + cudaDeviceSynchronize cudaGetDevice + cudaMalloc + cudaGetDeviceProperties whole @@ -67,6 +67,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/reduction/README.md b/Samples/2_Concepts_and_Techniques/reduction/README.md index 1fde9b55..65024c63 100644 --- a/Samples/2_Concepts_and_Techniques/reduction/README.md +++ b/Samples/2_Concepts_and_Techniques/reduction/README.md @@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaFree, cudaSetDevice, cudaDeviceSynchronize, cudaGetDevice, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [CPP11](../../../README.md#cpp11) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2017.vcxproj index 65d31a5d..f80b1766 100644 --- a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/reduction.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2019.vcxproj index b5b673a3..dea43eab 100644 --- a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/reduction.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2022.vcxproj index a3f39519..bbc6826a 100644 --- a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/reduction.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/Makefile b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/Makefile index 32bdaeb7..2f62c73e 100644 --- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/Makefile +++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/Makefile @@ -322,9 +322,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 61 70 72 75 80 86 87 +SMS ?= 61 70 72 75 80 86 87 90 else -SMS ?= 60 61 70 75 80 86 +SMS ?= 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/NsightEclipse.xml index bc231be2..09decc91 100644 --- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/NsightEclipse.xml @@ -6,15 +6,15 @@ --std=c++11 - cudaFree - cudaOccupancyMaxActiveBlocksPerMultiprocessor - cudaOccupancyMaxPotentialBlockSize - cudaDeviceSynchronize - cudaSetDevice - cudaMalloc - cudaLaunchCooperativeKernel cudaMemcpy + cudaFree + cudaSetDevice + cudaDeviceSynchronize + cudaLaunchCooperativeKernel + cudaMalloc + cudaOccupancyMaxActiveBlocksPerMultiprocessor cudaGetDeviceProperties + cudaOccupancyMaxPotentialBlockSize @@ -51,6 +51,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md index b0d5e58f..7473bae9 100644 --- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md +++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md @@ -10,7 +10,7 @@ Cooperative Groups, MultiBlock Cooperative Groups ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaOccupancyMaxPotentialBlockSize, cudaDeviceSynchronize, cudaSetDevice, cudaMalloc, cudaLaunchCooperativeKernel, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaSetDevice, cudaDeviceSynchronize, cudaLaunchCooperativeKernel, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties, cudaOccupancyMaxPotentialBlockSize ## Dependencies needed to build/run [MBCG](../../../README.md#mbcg), [CPP11](../../../README.md#cpp11) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2017.vcxproj index 4a505b43..c467625a 100644 --- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/reductionMultiBlockCG.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2019.vcxproj index 1e0a9cd1..306b8c6d 100644 --- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/reductionMultiBlockCG.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2022.vcxproj index e5b2eb9f..a7261ba7 100644 --- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/reductionMultiBlockCG.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/Makefile b/Samples/2_Concepts_and_Techniques/scalarProd/Makefile index e20a3810..9cacc538 100644 --- a/Samples/2_Concepts_and_Techniques/scalarProd/Makefile +++ b/Samples/2_Concepts_and_Techniques/scalarProd/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/scalarProd/NsightEclipse.xml index 6d36f764..79a458f3 100644 --- a/Samples/2_Concepts_and_Techniques/scalarProd/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/scalarProd/NsightEclipse.xml @@ -4,9 +4,9 @@ scalarProd cudaMalloc - cudaFree cudaDeviceSynchronize cudaMemcpy + cudaFree whole @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/README.md b/Samples/2_Concepts_and_Techniques/scalarProd/README.md index 3b54a966..47ff8e57 100644 --- a/Samples/2_Concepts_and_Techniques/scalarProd/README.md +++ b/Samples/2_Concepts_and_Techniques/scalarProd/README.md @@ -10,7 +10,7 @@ Linear Algebra ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy +cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2017.vcxproj index 087bc7df..d404cd61 100644 --- a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/scalarProd.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2019.vcxproj index bfb89fa1..72e9579a 100644 --- a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/scalarProd.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2022.vcxproj index 0aabdbf4..cb130eaf 100644 --- a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/scalarProd.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/scan/Makefile b/Samples/2_Concepts_and_Techniques/scan/Makefile index 5d371f2f..8ce4ab79 100644 --- a/Samples/2_Concepts_and_Techniques/scan/Makefile +++ b/Samples/2_Concepts_and_Techniques/scan/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/scan/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/scan/NsightEclipse.xml index 239b68b1..11e6c2f3 100644 --- a/Samples/2_Concepts_and_Techniques/scan/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/scan/NsightEclipse.xml @@ -4,9 +4,9 @@ scan cudaMalloc - cudaFree cudaDeviceSynchronize cudaMemcpy + cudaFree whole @@ -50,6 +50,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/scan/README.md b/Samples/2_Concepts_and_Techniques/scan/README.md index 8a0563f4..e10d0a38 100644 --- a/Samples/2_Concepts_and_Techniques/scan/README.md +++ b/Samples/2_Concepts_and_Techniques/scan/README.md @@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy +cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/scan/scan_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/scan/scan_vs2017.vcxproj index 37db8569..ac2bd4bd 100644 --- a/Samples/2_Concepts_and_Techniques/scan/scan_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/scan/scan_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/scan.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/scan/scan_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/scan/scan_vs2019.vcxproj index b6da5f75..fddb8498 100644 --- a/Samples/2_Concepts_and_Techniques/scan/scan_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/scan/scan_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/scan.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/scan/scan_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/scan/scan_vs2022.vcxproj index cb3e2bed..4f238933 100644 --- a/Samples/2_Concepts_and_Techniques/scan/scan_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/scan/scan_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/scan.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/Makefile b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/Makefile index 9f15915f..fe1a04ef 100644 --- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/Makefile +++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/Makefile @@ -304,9 +304,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/NsightEclipse.xml index dd3363ea..bfabb713 100644 --- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/NsightEclipse.xml @@ -7,13 +7,13 @@ --threads 1 - cudaMemset - cudaEventRecord - cudaEventCreate - cudaEventElapsedTime - cudaEventSynchronize - cudaMemGetInfo cudaMemcpy + cudaMemGetInfo + cudaEventSynchronize + cudaEventRecord + cudaMemset + cudaEventElapsedTime + cudaEventCreate whole @@ -60,6 +60,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/README.md b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/README.md index cd6cbc5a..b6292353 100644 --- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/README.md +++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/README.md @@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaMemGetInfo, cudaMemcpy +cudaMemcpy, cudaMemGetInfo, cudaEventSynchronize, cudaEventRecord, cudaMemset, cudaEventElapsedTime, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2017.vcxproj index 14b777a6..d72fc3b5 100644 --- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/segmentationTreeThrust.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 1 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2019.vcxproj index 591302fe..96fd3760 100644 --- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/segmentationTreeThrust.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 1 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2022.vcxproj index e82eceb4..18e37f1f 100644 --- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/segmentationTreeThrust.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 1 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/Makefile b/Samples/2_Concepts_and_Techniques/shfl_scan/Makefile index 256e10fd..4f210b3e 100644 --- a/Samples/2_Concepts_and_Techniques/shfl_scan/Makefile +++ b/Samples/2_Concepts_and_Techniques/shfl_scan/Makefile @@ -310,9 +310,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/shfl_scan/NsightEclipse.xml index 8410c325..d6060063 100644 --- a/Samples/2_Concepts_and_Techniques/shfl_scan/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/shfl_scan/NsightEclipse.xml @@ -7,18 +7,18 @@ -O3 - cudaMemset - cudaFree - cudaEventRecord - cudaMallocHost - cudaEventCreate - cudaEventElapsedTime - cudaEventSynchronize - cudaFreeHost - cudaMalloc cudaMemcpy - cudaGetDeviceProperties + cudaFree + cudaMallocHost + cudaEventSynchronize + cudaEventRecord + cudaFreeHost cudaGetDevice + cudaMemset + cudaMalloc + cudaEventElapsedTime + cudaGetDeviceProperties + cudaEventCreate whole @@ -66,6 +66,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/README.md b/Samples/2_Concepts_and_Techniques/shfl_scan/README.md index f042bb79..5afaefea 100644 --- a/Samples/2_Concepts_and_Techniques/shfl_scan/README.md +++ b/Samples/2_Concepts_and_Techniques/shfl_scan/README.md @@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaEventRecord, cudaMallocHost, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaFree, cudaMallocHost, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDevice, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [CPP11](../../../README.md#cpp11) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2017.vcxproj index ee4c6348..9e801711 100644 --- a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/shfl_scan.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2019.vcxproj index c99b3fe4..72076c47 100644 --- a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/shfl_scan.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2022.vcxproj index ec328ba6..3ad9db48 100644 --- a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/shfl_scan.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/Makefile b/Samples/2_Concepts_and_Techniques/sortingNetworks/Makefile index a2de7cb9..7d715f26 100644 --- a/Samples/2_Concepts_and_Techniques/sortingNetworks/Makefile +++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/sortingNetworks/NsightEclipse.xml index bec0501a..3ef62721 100644 --- a/Samples/2_Concepts_and_Techniques/sortingNetworks/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/NsightEclipse.xml @@ -4,9 +4,9 @@ sortingNetworks cudaMalloc - cudaFree cudaDeviceSynchronize cudaMemcpy + cudaFree whole @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/README.md b/Samples/2_Concepts_and_Techniques/sortingNetworks/README.md index 905e698f..707ed30d 100644 --- a/Samples/2_Concepts_and_Techniques/sortingNetworks/README.md +++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/README.md @@ -10,7 +10,7 @@ Data-Parallel Algorithms ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy +cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2017.vcxproj index 0012d67a..7c018efb 100644 --- a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/sortingNetworks.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2019.vcxproj index 90052760..3181de72 100644 --- a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/sortingNetworks.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2022.vcxproj index 626d389a..dc83a7b7 100644 --- a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/sortingNetworks.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/Makefile b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/Makefile index c2e55c39..c672f707 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/Makefile +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/NsightEclipse.xml index 63511e06..8ca780e9 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/NsightEclipse.xml @@ -3,20 +3,20 @@ streamOrderedAllocation + cudaDeviceGetDefaultMemPool + cudaFreeAsync + cudaStreamCreateWithFlags cudaStreamDestroy - cudaEventRecord - cudaEventCreate - cudaEventElapsedTime - cudaEventSynchronize cudaDeviceGetAttribute cudaMallocAsync - cudaStreamCreateWithFlags - cudaDeviceGetDefaultMemPool cudaSetDevice - cudaMemPoolSetAttribute + cudaEventSynchronize + cudaEventRecord cudaStreamSynchronize + cudaMemPoolSetAttribute + cudaEventElapsedTime cudaMemcpyAsync - cudaFreeAsync + cudaEventCreate whole @@ -48,6 +48,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/README.md b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/README.md index 2eaa6d1a..4af372ec 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/README.md +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/README.md @@ -10,7 +10,7 @@ Performance Strategies ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaDeviceGetAttribute, cudaMallocAsync, cudaStreamCreateWithFlags, cudaDeviceGetDefaultMemPool, cudaSetDevice, cudaMemPoolSetAttribute, cudaStreamSynchronize, cudaMemcpyAsync, cudaFreeAsync +cudaDeviceGetDefaultMemPool, cudaFreeAsync, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaDeviceGetAttribute, cudaMallocAsync, cudaSetDevice, cudaEventSynchronize, cudaEventRecord, cudaStreamSynchronize, cudaMemPoolSetAttribute, cudaEventElapsedTime, cudaMemcpyAsync, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj index 376ea1fa..8f4dc7c0 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/streamOrderedAllocation.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj index f81e896c..9cd3baae 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/streamOrderedAllocation.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2022.vcxproj index b61e42f3..6f2d5040 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/streamOrderedAllocation.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile index b71befbe..41845161 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile @@ -305,9 +305,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/NsightEclipse.xml index d583eaaa..b357595e 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/NsightEclipse.xml @@ -8,28 +8,28 @@ cuDeviceGetAttribute cuDeviceGet - cudaDeviceEnablePeerAccess - cudaMemPoolImportPointer - cudaOccupancyMaxActiveBlocksPerMultiprocessor - cudaMemPoolDestroy - cudaMallocAsync - cudaStreamCreateWithFlags - cudaDeviceCanAccessPeer - cudaMemcpyAsync - cudaStreamDestroy - cudaSetDevice - cudaGetDeviceProperties - cudaMemPoolSetAccess - cudaGetDeviceCount cudaDeviceGetAttribute - cudaMemPoolExportPointer cudaMemPoolImportFromShareableHandle - cudaMemPoolCreate - cudaGetLastError - cudaStreamSynchronize + cudaSetDevice + cudaMemPoolExportPointer cudaMemPoolGetAccess - cudaMemPoolExportToShareableHandle + cudaMemPoolDestroy + cudaMemPoolSetAccess + cudaMallocAsync + cudaMemPoolImportPointer + cudaGetDeviceCount + cudaMemcpyAsync + cudaDeviceCanAccessPeer cudaFreeAsync + cudaStreamCreateWithFlags + cudaStreamDestroy + cudaGetLastError + cudaMemPoolCreate + cudaMemPoolExportToShareableHandle + cudaStreamSynchronize + cudaDeviceEnablePeerAccess + cudaOccupancyMaxActiveBlocksPerMultiprocessor + cudaGetDeviceProperties whole @@ -63,6 +63,7 @@ sm80 sm86 sm87 + sm90 ../../../Common/helper_multiprocess.cpp ../../../Common/helper_multiprocess.h diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md index 73a90c5e..b3eff96a 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md @@ -10,7 +10,7 @@ Performance Strategies ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -26,11 +26,11 @@ x86_64 cuDeviceGetAttribute, cuDeviceGet ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceEnablePeerAccess, cudaMemPoolImportPointer, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaMemPoolDestroy, cudaMallocAsync, cudaStreamCreateWithFlags, cudaDeviceCanAccessPeer, cudaMemcpyAsync, cudaStreamDestroy, cudaSetDevice, cudaGetDeviceProperties, cudaMemPoolSetAccess, cudaGetDeviceCount, cudaDeviceGetAttribute, cudaMemPoolExportPointer, cudaMemPoolImportFromShareableHandle, cudaMemPoolCreate, cudaGetLastError, cudaStreamSynchronize, cudaMemPoolGetAccess, cudaMemPoolExportToShareableHandle, cudaFreeAsync +cudaDeviceGetAttribute, cudaMemPoolImportFromShareableHandle, cudaSetDevice, cudaMemPoolExportPointer, cudaMemPoolGetAccess, cudaMemPoolDestroy, cudaMemPoolSetAccess, cudaMallocAsync, cudaMemPoolImportPointer, cudaGetDeviceCount, cudaMemcpyAsync, cudaDeviceCanAccessPeer, cudaFreeAsync, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaGetLastError, cudaMemPoolCreate, cudaMemPoolExportToShareableHandle, cudaStreamSynchronize, cudaDeviceEnablePeerAccess, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/Makefile b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/Makefile index 792db21e..75bf6386 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/Makefile +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/NsightEclipse.xml index 3e51c0ff..f6444442 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/NsightEclipse.xml @@ -6,21 +6,21 @@ --std=c++11 - cudaStreamWaitEvent - cudaStreamDestroy - cudaMemPoolSetAccess - cudaEventRecord - cudaEventCreate - cudaGetDeviceCount - cudaMallocAsync - cudaDeviceGetAttribute - cudaStreamCreateWithFlags - cudaDeviceCanAccessPeer cudaDeviceGetDefaultMemPool - cudaSetDevice - cudaStreamSynchronize - cudaMemcpyAsync cudaFreeAsync + cudaStreamCreateWithFlags + cudaMemPoolSetAccess + cudaStreamDestroy + cudaDeviceGetAttribute + cudaMallocAsync + cudaSetDevice + cudaGetDeviceCount + cudaEventRecord + cudaStreamSynchronize + cudaStreamWaitEvent + cudaMemcpyAsync + cudaDeviceCanAccessPeer + cudaEventCreate whole @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md index 9ef3fa17..0b2a83d9 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md @@ -10,7 +10,7 @@ Performance Strategies ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamWaitEvent, cudaStreamDestroy, cudaMemPoolSetAccess, cudaEventRecord, cudaEventCreate, cudaGetDeviceCount, cudaMallocAsync, cudaDeviceGetAttribute, cudaStreamCreateWithFlags, cudaDeviceCanAccessPeer, cudaDeviceGetDefaultMemPool, cudaSetDevice, cudaStreamSynchronize, cudaMemcpyAsync, cudaFreeAsync +cudaDeviceGetDefaultMemPool, cudaFreeAsync, cudaStreamCreateWithFlags, cudaMemPoolSetAccess, cudaStreamDestroy, cudaDeviceGetAttribute, cudaMallocAsync, cudaSetDevice, cudaGetDeviceCount, cudaEventRecord, cudaStreamSynchronize, cudaStreamWaitEvent, cudaMemcpyAsync, cudaDeviceCanAccessPeer, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj index 5b9ec320..5ad486fc 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/streamOrderedAllocationP2P.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj index 79b1ce85..a7248a6e 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/streamOrderedAllocationP2P.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2022.vcxproj index 6b468f18..0ea72145 100644 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/streamOrderedAllocationP2P.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/Makefile b/Samples/2_Concepts_and_Techniques/threadFenceReduction/Makefile index e3f4586b..37b8a9b4 100644 --- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/Makefile +++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/threadFenceReduction/NsightEclipse.xml index e997b187..8436fef3 100644 --- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/NsightEclipse.xml +++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/NsightEclipse.xml @@ -3,10 +3,10 @@ threadFenceReduction + cudaMemcpy cudaFree cudaDeviceSynchronize cudaMalloc - cudaMemcpy cudaGetDeviceProperties @@ -48,6 +48,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/README.md b/Samples/2_Concepts_and_Techniques/threadFenceReduction/README.md index 4ce80f5c..0156a5f1 100644 --- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/README.md +++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/README.md @@ -10,7 +10,7 @@ Cooperative Groups, Data-Parallel Algorithms, Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2017.vcxproj index 4b0cf87e..0d547054 100644 --- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/threadFenceReduction.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2019.vcxproj index b15f148f..ca4df79f 100644 --- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/threadFenceReduction.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2022.vcxproj index 157d11dc..2e5fa2f9 100644 --- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/threadFenceReduction.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/Makefile b/Samples/2_Concepts_and_Techniques/threadMigration/Makefile index 3acd8454..6c4d542d 100644 --- a/Samples/2_Concepts_and_Techniques/threadMigration/Makefile +++ b/Samples/2_Concepts_and_Techniques/threadMigration/Makefile @@ -283,9 +283,9 @@ FATBIN_FILE := threadMigration_kernel${TARGET_SIZE}.fatbin # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(GENCODE_FLAGS),) diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/README.md b/Samples/2_Concepts_and_Techniques/threadMigration/README.md index 22a095da..801305ce 100644 --- a/Samples/2_Concepts_and_Techniques/threadMigration/README.md +++ b/Samples/2_Concepts_and_Techniques/threadMigration/README.md @@ -10,7 +10,7 @@ CUDA Driver API ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuModuleLoadData, cuDeviceGetCount, cuCtxCreate, cuCtxPopCurrent, cuDeviceGetName, cuLaunchKernel, cuMemAlloc, cuMemcpyDtoH, cuModuleUnload, cuCtxPushCurrent, cuDeviceGet, cuMemFree, cuInit, cuCtxDestroy, cuDeviceGetAttribute +cuMemcpyDtoH, cuLaunchKernel, cuModuleLoadData, cuDeviceGetName, cuDeviceGet, cuDeviceGetAttribute, cuMemAlloc, cuMemFree, cuCtxDestroy, cuCtxPopCurrent, cuModuleUnload, cuDeviceGetCount, cuModuleGetFunction, cuCtxCreate, cuCtxPushCurrent, cuInit ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2017.vcxproj index 8a38efdd..ba53a46a 100644 --- a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2017.vcxproj +++ b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/threadMigration.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2019.vcxproj index 05df36d8..6f0f8a90 100644 --- a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2019.vcxproj +++ b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/threadMigration.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2022.vcxproj index cb4ebcc8..b81a2b07 100644 --- a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2022.vcxproj +++ b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/threadMigration.exe - compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/StreamPriorities/Makefile b/Samples/3_CUDA_Features/StreamPriorities/Makefile index 9b1734f5..6faeb198 100644 --- a/Samples/3_CUDA_Features/StreamPriorities/Makefile +++ b/Samples/3_CUDA_Features/StreamPriorities/Makefile @@ -297,9 +297,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/StreamPriorities/NsightEclipse.xml b/Samples/3_CUDA_Features/StreamPriorities/NsightEclipse.xml index b3e3aab7..0cb59d58 100644 --- a/Samples/3_CUDA_Features/StreamPriorities/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/StreamPriorities/NsightEclipse.xml @@ -3,15 +3,15 @@ StreamPriorities - cudaDeviceGetStreamPriorityRange - cudaEventRecord - cudaEventCreate - cudaEventElapsedTime - cudaEventSynchronize - cudaMalloc - cudaStreamCreateWithPriority cudaMemcpy + cudaStreamCreateWithPriority + cudaDeviceGetStreamPriorityRange + cudaEventSynchronize + cudaEventRecord + cudaMalloc + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -54,6 +54,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/StreamPriorities/README.md b/Samples/3_CUDA_Features/StreamPriorities/README.md index 52150773..0b616b49 100644 --- a/Samples/3_CUDA_Features/StreamPriorities/README.md +++ b/Samples/3_CUDA_Features/StreamPriorities/README.md @@ -10,7 +10,7 @@ CUDA Streams and Events ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceGetStreamPriorityRange, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaStreamCreateWithPriority, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaStreamCreateWithPriority, cudaDeviceGetStreamPriorityRange, cudaEventSynchronize, cudaEventRecord, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [Stream-Priorities](../../../README.md#stream-priorities) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/Makefile b/Samples/3_CUDA_Features/bf16TensorCoreGemm/Makefile index 3d8b8dec..29fdcbae 100644 --- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/Makefile +++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/Makefile @@ -322,9 +322,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 80 86 87 +SMS ?= 80 86 87 90 else -SMS ?= 80 86 +SMS ?= 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/bf16TensorCoreGemm/NsightEclipse.xml index a9313696..1eac8e5c 100644 --- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/NsightEclipse.xml @@ -6,18 +6,18 @@ --std=c++11 - cudaMemset - cudaFree - cudaEventRecord - cudaEventCreate - cudaFuncSetAttribute - cudaEventElapsedTime - cudaEventSynchronize - cudaMalloc - cudaGetLastError cudaMemcpy + cudaFree cudaGetErrorString + cudaGetLastError + cudaEventSynchronize + cudaFuncSetAttribute + cudaEventRecord + cudaMemset + cudaMalloc + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/README.md b/Samples/3_CUDA_Features/bf16TensorCoreGemm/README.md index 7a8b729b..30f4eece 100644 --- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/README.md +++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/README.md @@ -10,7 +10,7 @@ Matrix Multiply, WMMA, Tensor Cores ## Supported SM Architectures -[SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [CPP11](../../../README.md#cpp11) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj index bfab31dc..b8cb9fb2 100644 --- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/bf16TensorCoreGemm.exe - compute_80,sm_80;compute_86,sm_86; + compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj index 4240024c..c2c1f920 100644 --- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bf16TensorCoreGemm.exe - compute_80,sm_80;compute_86,sm_86; + compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2022.vcxproj index ecbfc6d5..774b45bf 100644 --- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bf16TensorCoreGemm.exe - compute_80,sm_80;compute_86,sm_86; + compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/Makefile b/Samples/3_CUDA_Features/binaryPartitionCG/Makefile index d1bdbc6d..ace0b3ae 100644 --- a/Samples/3_CUDA_Features/binaryPartitionCG/Makefile +++ b/Samples/3_CUDA_Features/binaryPartitionCG/Makefile @@ -310,9 +310,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/NsightEclipse.xml b/Samples/3_CUDA_Features/binaryPartitionCG/NsightEclipse.xml index b611d7c1..96982217 100644 --- a/Samples/3_CUDA_Features/binaryPartitionCG/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/binaryPartitionCG/NsightEclipse.xml @@ -6,15 +6,15 @@ --std=c++11 + cudaStreamCreateWithFlags cudaFree cudaMallocHost - cudaOccupancyMaxPotentialBlockSize - cudaMemsetAsync cudaFreeHost - cudaMalloc - cudaStreamCreateWithFlags cudaStreamSynchronize + cudaMalloc + cudaMemsetAsync cudaMemcpyAsync + cudaOccupancyMaxPotentialBlockSize whole @@ -54,6 +54,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/README.md b/Samples/3_CUDA_Features/binaryPartitionCG/README.md index c40ff6bd..d2c29682 100644 --- a/Samples/3_CUDA_Features/binaryPartitionCG/README.md +++ b/Samples/3_CUDA_Features/binaryPartitionCG/README.md @@ -10,7 +10,7 @@ Cooperative Groups ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMallocHost, cudaOccupancyMaxPotentialBlockSize, cudaMemsetAsync, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaMemcpyAsync +cudaStreamCreateWithFlags, cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemsetAsync, cudaMemcpyAsync, cudaOccupancyMaxPotentialBlockSize ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj index cd3fccf0..be9569bb 100644 --- a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/binaryPartitionCG.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj index 5b7320ce..c3c28362 100644 --- a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/binaryPartitionCG.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2022.vcxproj b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2022.vcxproj index df9ea16f..594d4bd3 100644 --- a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/binaryPartitionCG.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/bindlessTexture/Makefile b/Samples/3_CUDA_Features/bindlessTexture/Makefile index 4310ee3e..9e9c3369 100644 --- a/Samples/3_CUDA_Features/bindlessTexture/Makefile +++ b/Samples/3_CUDA_Features/bindlessTexture/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/bindlessTexture/NsightEclipse.xml b/Samples/3_CUDA_Features/bindlessTexture/NsightEclipse.xml index 247f1255..25b63ad8 100644 --- a/Samples/3_CUDA_Features/bindlessTexture/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/bindlessTexture/NsightEclipse.xml @@ -3,28 +3,28 @@ bindlessTexture - cudaGraphicsMapResources - cudaGetMipmappedArrayLevel - cudaGraphicsResourceGetMappedPointer - cudaArrayGetInfo cudaMemcpy - cudaFreeMipmappedArray + cudaGetMipmappedArrayLevel + cudaGraphicsMapResources cudaDestroySurfaceObject - cudaPitchedPtr - cudaMalloc - cudaGraphicsUnregisterResource - cudaDeviceSynchronize - cudaDestroyTextureObject - cudaGetLastError - cudaFree - cudaFreeArray - cudaGraphicsGLRegisterBuffer cudaExtent + cudaDeviceSynchronize cudaCreateSurfaceObject cudaMallocMipmappedArray + cudaPitchedPtr + cudaGraphicsResourceGetMappedPointer cudaCreateTextureObject - cudaMallocArray cudaGraphicsUnmapResources + cudaMallocArray + cudaFreeArray + cudaArrayGetInfo + cudaGetLastError + cudaDestroyTextureObject + cudaGraphicsGLRegisterBuffer + cudaFreeMipmappedArray + cudaFree + cudaGraphicsUnregisterResource + cudaMalloc whole @@ -85,6 +85,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/bindlessTexture/README.md b/Samples/3_CUDA_Features/bindlessTexture/README.md index 4047f08a..cf14ba1a 100644 --- a/Samples/3_CUDA_Features/bindlessTexture/README.md +++ b/Samples/3_CUDA_Features/bindlessTexture/README.md @@ -10,7 +10,7 @@ Graphics Interop, Texture ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGraphicsMapResources, cudaGetMipmappedArrayLevel, cudaGraphicsResourceGetMappedPointer, cudaArrayGetInfo, cudaMemcpy, cudaFreeMipmappedArray, cudaDestroySurfaceObject, cudaPitchedPtr, cudaMalloc, cudaGraphicsUnregisterResource, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaGetLastError, cudaFree, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaExtent, cudaCreateSurfaceObject, cudaMallocMipmappedArray, cudaCreateTextureObject, cudaMallocArray, cudaGraphicsUnmapResources +cudaMemcpy, cudaGetMipmappedArrayLevel, cudaGraphicsMapResources, cudaDestroySurfaceObject, cudaExtent, cudaDeviceSynchronize, cudaCreateSurfaceObject, cudaMallocMipmappedArray, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaCreateTextureObject, cudaGraphicsUnmapResources, cudaMallocArray, cudaFreeArray, cudaArrayGetInfo, cudaGetLastError, cudaDestroyTextureObject, cudaGraphicsGLRegisterBuffer, cudaFreeMipmappedArray, cudaFree, cudaGraphicsUnregisterResource, cudaMalloc ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2017.vcxproj b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2017.vcxproj index c7434315..bcc1990a 100644 --- a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/bindlessTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2019.vcxproj b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2019.vcxproj index d18b1ac4..110d990f 100644 --- a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bindlessTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2022.vcxproj b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2022.vcxproj index e08edee0..f9bcc8ae 100644 --- a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bindlessTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/3_CUDA_Features/bindlessTexture/findgllib.mk b/Samples/3_CUDA_Features/bindlessTexture/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/3_CUDA_Features/bindlessTexture/findgllib.mk +++ b/Samples/3_CUDA_Features/bindlessTexture/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/Makefile b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/Makefile index 85498be3..5eab1414 100644 --- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/Makefile +++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/Makefile @@ -310,9 +310,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 61 70 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/NsightEclipse.xml index 714aab28..383410d3 100644 --- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/NsightEclipse.xml @@ -8,20 +8,20 @@ --std=c++14 - cudaMemset - cudaFree - cudaEventRecord - cudaEventCreate - cudaEventElapsedTime - cudaDeviceSynchronize - cudaPeekAtLastError - cudaMalloc cudaStreamCreateWithFlags - cudaGetLastError - cudaMemcpyAsync cudaMemcpy + cudaMemcpyAsync + cudaFree cudaGetErrorString + cudaGetLastError + cudaPeekAtLastError + cudaDeviceSynchronize + cudaEventRecord + cudaMemset + cudaMalloc + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate @@ -63,6 +63,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/README.md b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/README.md index b3f33419..1314b89b 100644 --- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/README.md +++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/README.md @@ -10,7 +10,7 @@ Cooperative Groups, CUDA Dynamic Parallelism ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaPeekAtLastError, cudaMalloc, cudaStreamCreateWithFlags, cudaGetLastError, cudaMemcpyAsync, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaStreamCreateWithFlags, cudaMemcpy, cudaMemcpyAsync, cudaFree, cudaGetErrorString, cudaGetLastError, cudaPeekAtLastError, cudaDeviceSynchronize, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [CDP](../../../README.md#cdp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2017.vcxproj index 6244cf4f..5c76d1c7 100644 --- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cdpAdvancedQuicksort.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2019.vcxproj index 68071f8a..4779bb51 100644 --- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpAdvancedQuicksort.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2022.vcxproj index 462ed63c..39da5407 100644 --- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpAdvancedQuicksort.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/Makefile b/Samples/3_CUDA_Features/cdpBezierTessellation/Makefile index 4f89e84d..23ef29e8 100644 --- a/Samples/3_CUDA_Features/cdpBezierTessellation/Makefile +++ b/Samples/3_CUDA_Features/cdpBezierTessellation/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 61 70 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpBezierTessellation/NsightEclipse.xml index 5566265e..26392f7b 100644 --- a/Samples/3_CUDA_Features/cdpBezierTessellation/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/cdpBezierTessellation/NsightEclipse.xml @@ -6,10 +6,10 @@ -dc + cudaMemcpy cudaFree cudaGetDeviceCount cudaMalloc - cudaMemcpy cudaGetDeviceProperties @@ -50,6 +50,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/README.md b/Samples/3_CUDA_Features/cdpBezierTessellation/README.md index 30ba3375..bb2d6e63 100644 --- a/Samples/3_CUDA_Features/cdpBezierTessellation/README.md +++ b/Samples/3_CUDA_Features/cdpBezierTessellation/README.md @@ -10,7 +10,7 @@ CUDA Dynamic Parallelism ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGetDeviceCount, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaGetDeviceCount, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [CDP](../../../README.md#cdp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2017.vcxproj index 6436a8ad..e7733bf8 100644 --- a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cdpBezierTessellation.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2019.vcxproj index 1452f331..aa2fefe0 100644 --- a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpBezierTessellation.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2022.vcxproj index b1eb0d98..577c5e19 100644 --- a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpBezierTessellation.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpQuadtree/Makefile b/Samples/3_CUDA_Features/cdpQuadtree/Makefile index fc39f0e0..9b4e08f8 100644 --- a/Samples/3_CUDA_Features/cdpQuadtree/Makefile +++ b/Samples/3_CUDA_Features/cdpQuadtree/Makefile @@ -310,9 +310,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 61 70 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/cdpQuadtree/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpQuadtree/NsightEclipse.xml index 63b02230..6d867f7f 100644 --- a/Samples/3_CUDA_Features/cdpQuadtree/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/cdpQuadtree/NsightEclipse.xml @@ -7,11 +7,11 @@ --std=c++14 - cudaDeviceSetLimit - cudaFree - cudaMalloc - cudaGetLastError cudaMemcpy + cudaFree + cudaGetLastError + cudaDeviceSetLimit + cudaMalloc cudaGetDeviceProperties @@ -54,6 +54,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/cdpQuadtree/README.md b/Samples/3_CUDA_Features/cdpQuadtree/README.md index a170e47e..dc96c3c5 100644 --- a/Samples/3_CUDA_Features/cdpQuadtree/README.md +++ b/Samples/3_CUDA_Features/cdpQuadtree/README.md @@ -10,7 +10,7 @@ Cooperative Groups, CUDA Dynamic Parallelism ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceSetLimit, cudaFree, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaGetLastError, cudaDeviceSetLimit, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [CDP](../../../README.md#cdp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2017.vcxproj index b0cd8376..15110cd3 100644 --- a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cdpQuadtree.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2019.vcxproj index 99bfdc1d..3ec1b136 100644 --- a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpQuadtree.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2022.vcxproj index d2e08299..181408c3 100644 --- a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpQuadtree.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/Makefile b/Samples/3_CUDA_Features/cdpSimplePrint/Makefile index d64d69da..1ea64428 100644 --- a/Samples/3_CUDA_Features/cdpSimplePrint/Makefile +++ b/Samples/3_CUDA_Features/cdpSimplePrint/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 61 70 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpSimplePrint/NsightEclipse.xml index d1d5fd2f..cfe32d19 100644 --- a/Samples/3_CUDA_Features/cdpSimplePrint/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/cdpSimplePrint/NsightEclipse.xml @@ -6,10 +6,10 @@ -dc - cudaDeviceSetLimit - cudaGetLastError cudaDeviceSynchronize + cudaGetLastError cudaGetDeviceProperties + cudaDeviceSetLimit @@ -51,6 +51,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/README.md b/Samples/3_CUDA_Features/cdpSimplePrint/README.md index ab868271..c872f4bb 100644 --- a/Samples/3_CUDA_Features/cdpSimplePrint/README.md +++ b/Samples/3_CUDA_Features/cdpSimplePrint/README.md @@ -10,7 +10,7 @@ CUDA Dynamic Parallelism ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceSetLimit, cudaGetLastError, cudaDeviceSynchronize, cudaGetDeviceProperties +cudaDeviceSynchronize, cudaGetLastError, cudaGetDeviceProperties, cudaDeviceSetLimit ## Dependencies needed to build/run [CDP](../../../README.md#cdp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2017.vcxproj index 63502de3..b2613487 100644 --- a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cdpSimplePrint.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2019.vcxproj index d8719248..ae105c0a 100644 --- a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpSimplePrint.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2022.vcxproj index bc59636b..7f1b73f1 100644 --- a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpSimplePrint.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/Makefile b/Samples/3_CUDA_Features/cdpSimpleQuicksort/Makefile index 5771acad..1bf519d5 100644 --- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/Makefile +++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 61 70 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpSimpleQuicksort/NsightEclipse.xml index 82714619..9c196081 100644 --- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/NsightEclipse.xml @@ -6,13 +6,13 @@ -dc - cudaDeviceSetLimit - cudaFree - cudaStreamDestroy - cudaDeviceSynchronize - cudaMalloc cudaStreamCreateWithFlags cudaMemcpy + cudaStreamDestroy + cudaFree + cudaDeviceSynchronize + cudaDeviceSetLimit + cudaMalloc cudaGetDeviceProperties @@ -55,6 +55,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/README.md b/Samples/3_CUDA_Features/cdpSimpleQuicksort/README.md index 0805d380..5a765c0a 100644 --- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/README.md +++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/README.md @@ -10,7 +10,7 @@ CUDA Dynamic Parallelism ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceSetLimit, cudaFree, cudaStreamDestroy, cudaDeviceSynchronize, cudaMalloc, cudaStreamCreateWithFlags, cudaMemcpy, cudaGetDeviceProperties +cudaStreamCreateWithFlags, cudaMemcpy, cudaStreamDestroy, cudaFree, cudaDeviceSynchronize, cudaDeviceSetLimit, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [CDP](../../../README.md#cdp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2017.vcxproj index 1f396df6..701ef7df 100644 --- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cdpSimpleQuicksort.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2019.vcxproj index e926d0d1..be79af0a 100644 --- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpSimpleQuicksort.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2022.vcxproj index bf24f8f3..601d665a 100644 --- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cdpSimpleQuicksort.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/Makefile b/Samples/3_CUDA_Features/cudaCompressibleMemory/Makefile index 7118ad8c..9d7f9adf 100644 --- a/Samples/3_CUDA_Features/cudaCompressibleMemory/Makefile +++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/Makefile @@ -293,9 +293,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/README.md b/Samples/3_CUDA_Features/cudaCompressibleMemory/README.md index e95e4904..cd28b6f2 100644 --- a/Samples/3_CUDA_Features/cudaCompressibleMemory/README.md +++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/README.md @@ -10,7 +10,7 @@ CUDA Driver API, Compressible Memory, MMAP ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuMemRelease, cuCtxGetDevice, cuMemGetAllocationPropertiesFromHandle, cuMemSetAccess, cuMemMap, cuMemCreate, cuMemAddressFree, cuMemGetAllocationGranularity, cuMemUnmap, cuMemAddressReserve, cuDeviceGetAttribute +cuMemGetAllocationPropertiesFromHandle, cuMemCreate, cuDeviceGetAttribute, cuCtxGetDevice, cuMemGetAllocationGranularity, cuMemAddressFree, cuMemUnmap, cuMemMap, cuMemRelease, cuMemAddressReserve, cuMemSetAccess ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaEventRecord, cudaEventCreate, cudaOccupancyMaxPotentialBlockSize, cudaEventElapsedTime, cudaEventSynchronize, cudaMemcpy +cudaMemcpy, cudaEventSynchronize, cudaEventRecord, cudaEventElapsedTime, cudaOccupancyMaxPotentialBlockSize, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj index cb21616c..5fd82a2d 100644 --- a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cudaCompressibleMemory.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj index f4763a3e..f0138278 100644 --- a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cudaCompressibleMemory.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2022.vcxproj b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2022.vcxproj index 1a116a85..5c5b989c 100644 --- a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cudaCompressibleMemory.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/Makefile b/Samples/3_CUDA_Features/cudaTensorCoreGemm/Makefile index cd5ed03c..a949034f 100644 --- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/Makefile +++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/Makefile @@ -291,9 +291,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 70 72 75 80 86 87 +SMS ?= 70 72 75 80 86 87 90 else -SMS ?= 70 75 80 86 +SMS ?= 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/cudaTensorCoreGemm/NsightEclipse.xml index 25dc757d..e4306062 100644 --- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/NsightEclipse.xml @@ -6,18 +6,18 @@ -maxrregcount=255 - cudaMemset - cudaFree - cudaEventRecord - cudaEventCreate - cudaFuncSetAttribute - cudaEventElapsedTime - cudaEventSynchronize - cudaMalloc - cudaGetLastError cudaMemcpy + cudaFree cudaGetErrorString + cudaGetLastError + cudaEventSynchronize + cudaFuncSetAttribute + cudaEventRecord + cudaMemset + cudaMalloc + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/README.md b/Samples/3_CUDA_Features/cudaTensorCoreGemm/README.md index 5c7a02ca..ed9ca03e 100644 --- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/README.md +++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/README.md @@ -14,7 +14,7 @@ Matrix Multiply, WMMA, Tensor Cores ## Supported SM Architectures -[SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -27,11 +27,11 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj index 6f38472d..a5854708 100644 --- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cudaTensorCoreGemm.exe - compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj index 32c37bb4..5a7700ba 100644 --- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cudaTensorCoreGemm.exe - compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2022.vcxproj index 24801f67..3a4f102a 100644 --- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cudaTensorCoreGemm.exe - compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/Makefile b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/Makefile index e8ed96d9..a8731ad2 100644 --- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/Makefile +++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/Makefile @@ -322,9 +322,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 80 86 87 +SMS ?= 80 86 87 90 else -SMS ?= 80 86 +SMS ?= 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/NsightEclipse.xml index 2cd65814..f28f86b3 100644 --- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/NsightEclipse.xml @@ -6,18 +6,18 @@ --std=c++11 - cudaMemset - cudaFree - cudaEventRecord - cudaEventCreate - cudaFuncSetAttribute - cudaEventElapsedTime - cudaEventSynchronize - cudaMalloc - cudaGetLastError cudaMemcpy + cudaFree cudaGetErrorString + cudaGetLastError + cudaEventSynchronize + cudaFuncSetAttribute + cudaEventRecord + cudaMemset + cudaMalloc + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/README.md b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/README.md index 13b8e8e4..8699aa21 100644 --- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/README.md +++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/README.md @@ -10,7 +10,7 @@ Matrix Multiply, WMMA, Tensor Cores ## Supported SM Architectures -[SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [CPP11](../../../README.md#cpp11) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj index 8e82e63e..1dbcff5e 100644 --- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/dmmaTensorCoreGemm.exe - compute_80,sm_80;compute_86,sm_86; + compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj index 7c6849bd..0f024a26 100644 --- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/dmmaTensorCoreGemm.exe - compute_80,sm_80;compute_86,sm_86; + compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2022.vcxproj index 480cc0b8..6dcc0232 100644 --- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/dmmaTensorCoreGemm.exe - compute_80,sm_80;compute_86,sm_86; + compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/Makefile b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/Makefile index 7a7fd2fa..6fdd9aab 100644 --- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/Makefile +++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/Makefile @@ -310,9 +310,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 70 72 75 80 86 87 +SMS ?= 70 72 75 80 86 87 90 else -SMS ?= 70 75 80 86 +SMS ?= 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/NsightEclipse.xml b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/NsightEclipse.xml index 60eb92c8..83bc6d5c 100644 --- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/NsightEclipse.xml @@ -6,20 +6,20 @@ --std=c++11 - cudaFree - cudaEventRecord - cudaMallocHost - cudaEventCreate - cudaMemsetAsync - cudaEventElapsedTime - cudaEventSynchronize - cudaDeviceGetAttribute - cudaFreeHost - cudaMalloc cudaStreamCreateWithFlags - cudaEventDestroy + cudaMalloc + cudaDeviceGetAttribute + cudaFree + cudaMallocHost + cudaEventSynchronize + cudaEventRecord + cudaFreeHost cudaStreamSynchronize + cudaEventDestroy + cudaEventElapsedTime + cudaMemsetAsync cudaMemcpyAsync + cudaEventCreate whole @@ -59,6 +59,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/README.md b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/README.md index b5adb76f..a2d7d6a3 100644 --- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/README.md +++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/README.md @@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra, CPP11 CUDA ## Supported SM Architectures -[SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaEventRecord, cudaMallocHost, cudaEventCreate, cudaMemsetAsync, cudaEventElapsedTime, cudaEventSynchronize, cudaDeviceGetAttribute, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaEventDestroy, cudaStreamSynchronize, cudaMemcpyAsync +cudaStreamCreateWithFlags, cudaMalloc, cudaDeviceGetAttribute, cudaFree, cudaMallocHost, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaStreamSynchronize, cudaEventDestroy, cudaEventElapsedTime, cudaMemsetAsync, cudaMemcpyAsync, cudaEventCreate ## Dependencies needed to build/run [CPP11](../../../README.md#cpp11) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj index 100834d7..41bedad1 100644 --- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/globalToShmemAsyncCopy.exe - compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj index 5f1c0721..7db3f231 100644 --- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/globalToShmemAsyncCopy.exe - compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2022.vcxproj b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2022.vcxproj index 2b82306f..5351130d 100644 --- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/globalToShmemAsyncCopy.exe - compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/Makefile b/Samples/3_CUDA_Features/graphMemoryFootprint/Makefile index f4647da1..0233718b 100644 --- a/Samples/3_CUDA_Features/graphMemoryFootprint/Makefile +++ b/Samples/3_CUDA_Features/graphMemoryFootprint/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/NsightEclipse.xml b/Samples/3_CUDA_Features/graphMemoryFootprint/NsightEclipse.xml index 72be4775..7f7d842b 100644 --- a/Samples/3_CUDA_Features/graphMemoryFootprint/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/graphMemoryFootprint/NsightEclipse.xml @@ -3,23 +3,23 @@ graphMemoryFootprint - cudaGraphAddMemFreeNode - cudaStreamDestroy - cudaFree - cudaGraphExecDestroy - cudaGraphInstantiate - cudaDeviceGetAttribute - cudaDriverGetVersion - cudaGraphCreate - cudaGraphAddKernelNode cudaGraphAddMemAllocNode cudaStreamCreateWithFlags - cudaDeviceGraphMemTrim - cudaStreamSynchronize + cudaGraphInstantiate + cudaStreamDestroy + cudaFree + cudaDeviceGetAttribute + cudaGraphAddKernelNode + cudaGraphAddMemFreeNode cudaDeviceGetGraphMemAttribute + cudaGraphCreate cudaGraphDestroy - cudaGetDeviceProperties + cudaDriverGetVersion cudaGraphLaunch + cudaStreamSynchronize + cudaDeviceGraphMemTrim + cudaGetDeviceProperties + cudaGraphExecDestroy whole @@ -56,6 +56,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/README.md b/Samples/3_CUDA_Features/graphMemoryFootprint/README.md index 76c04cf7..6286fa0d 100644 --- a/Samples/3_CUDA_Features/graphMemoryFootprint/README.md +++ b/Samples/3_CUDA_Features/graphMemoryFootprint/README.md @@ -10,7 +10,7 @@ CUDA Runtime API, Performance Strategies, CUDA Graphs ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGraphAddMemFreeNode, cudaStreamDestroy, cudaFree, cudaGraphExecDestroy, cudaGraphInstantiate, cudaDeviceGetAttribute, cudaDriverGetVersion, cudaGraphCreate, cudaGraphAddKernelNode, cudaGraphAddMemAllocNode, cudaStreamCreateWithFlags, cudaDeviceGraphMemTrim, cudaStreamSynchronize, cudaDeviceGetGraphMemAttribute, cudaGraphDestroy, cudaGetDeviceProperties, cudaGraphLaunch +cudaGraphAddMemAllocNode, cudaStreamCreateWithFlags, cudaGraphInstantiate, cudaStreamDestroy, cudaFree, cudaDeviceGetAttribute, cudaGraphAddKernelNode, cudaGraphAddMemFreeNode, cudaDeviceGetGraphMemAttribute, cudaGraphCreate, cudaGraphDestroy, cudaDriverGetVersion, cudaGraphLaunch, cudaStreamSynchronize, cudaDeviceGraphMemTrim, cudaGetDeviceProperties, cudaGraphExecDestroy ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2017.vcxproj b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2017.vcxproj index 412492cb..e65d0b66 100644 --- a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/graphMemoryFootprint.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2019.vcxproj b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2019.vcxproj index 28c07316..82b98142 100644 --- a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/graphMemoryFootprint.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2022.vcxproj b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2022.vcxproj index 5eaef34b..cbf3dd31 100644 --- a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/graphMemoryFootprint.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/Makefile b/Samples/3_CUDA_Features/graphMemoryNodes/Makefile index a233d14b..b760fc44 100644 --- a/Samples/3_CUDA_Features/graphMemoryNodes/Makefile +++ b/Samples/3_CUDA_Features/graphMemoryNodes/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/NsightEclipse.xml b/Samples/3_CUDA_Features/graphMemoryNodes/NsightEclipse.xml index 5fd9f688..a2cc3608 100644 --- a/Samples/3_CUDA_Features/graphMemoryNodes/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/graphMemoryNodes/NsightEclipse.xml @@ -3,32 +3,32 @@ graphMemoryNodes - cudaMallocAsync - cudaStreamCreateWithFlags cudaMemcpy - cudaMemcpyAsync - cudaStreamDestroy - cudaMallocManaged - cudaEventCreate - cudaDriverGetVersion - cudaGraphCreate - cudaGraphAddMemAllocNode - cudaMalloc - cudaEventDestroy - cudaStreamEndCapture - cudaGraphExecDestroy - cudaStreamBeginCapture cudaDeviceGetAttribute - cudaStreamSynchronize - cudaGraphDestroy + cudaDriverGetVersion cudaGraphLaunch - cudaGraphAddMemFreeNode - cudaStreamWaitEvent - cudaFree - cudaEventRecord - cudaGraphInstantiate - cudaGraphAddKernelNode + cudaEventDestroy + cudaMallocAsync + cudaStreamEndCapture + cudaMallocManaged + cudaGraphCreate + cudaMemcpyAsync cudaFreeAsync + cudaStreamCreateWithFlags + cudaGraphInstantiate + cudaStreamDestroy + cudaStreamBeginCapture + cudaStreamWaitEvent + cudaEventCreate + cudaGraphAddMemAllocNode + cudaFree + cudaGraphAddKernelNode + cudaGraphAddMemFreeNode + cudaGraphDestroy + cudaEventRecord + cudaStreamSynchronize + cudaMalloc + cudaGraphExecDestroy whole @@ -67,6 +67,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/README.md b/Samples/3_CUDA_Features/graphMemoryNodes/README.md index f3e934e1..7bf467a4 100644 --- a/Samples/3_CUDA_Features/graphMemoryNodes/README.md +++ b/Samples/3_CUDA_Features/graphMemoryNodes/README.md @@ -10,7 +10,7 @@ CUDA Graphs, Stream Capture ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMallocAsync, cudaStreamCreateWithFlags, cudaMemcpy, cudaMemcpyAsync, cudaStreamDestroy, cudaMallocManaged, cudaEventCreate, cudaDriverGetVersion, cudaGraphCreate, cudaGraphAddMemAllocNode, cudaMalloc, cudaEventDestroy, cudaStreamEndCapture, cudaGraphExecDestroy, cudaStreamBeginCapture, cudaDeviceGetAttribute, cudaStreamSynchronize, cudaGraphDestroy, cudaGraphLaunch, cudaGraphAddMemFreeNode, cudaStreamWaitEvent, cudaFree, cudaEventRecord, cudaGraphInstantiate, cudaGraphAddKernelNode, cudaFreeAsync +cudaMemcpy, cudaDeviceGetAttribute, cudaDriverGetVersion, cudaGraphLaunch, cudaEventDestroy, cudaMallocAsync, cudaStreamEndCapture, cudaMallocManaged, cudaGraphCreate, cudaMemcpyAsync, cudaFreeAsync, cudaStreamCreateWithFlags, cudaGraphInstantiate, cudaStreamDestroy, cudaStreamBeginCapture, cudaStreamWaitEvent, cudaEventCreate, cudaGraphAddMemAllocNode, cudaFree, cudaGraphAddKernelNode, cudaGraphAddMemFreeNode, cudaGraphDestroy, cudaEventRecord, cudaStreamSynchronize, cudaMalloc, cudaGraphExecDestroy ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2017.vcxproj b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2017.vcxproj index 54629b89..f025d778 100644 --- a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/graphMemoryNodes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2019.vcxproj b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2019.vcxproj index ce007363..df298580 100644 --- a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/graphMemoryNodes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2022.vcxproj b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2022.vcxproj index 70d59c50..5f123dc9 100644 --- a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/graphMemoryNodes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/Makefile b/Samples/3_CUDA_Features/immaTensorCoreGemm/Makefile index 0018823d..840e9399 100644 --- a/Samples/3_CUDA_Features/immaTensorCoreGemm/Makefile +++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/Makefile @@ -291,9 +291,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 72 75 80 86 87 +SMS ?= 72 75 80 86 87 90 else -SMS ?= 75 80 86 +SMS ?= 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/immaTensorCoreGemm/NsightEclipse.xml index ce92f2df..35a48fe6 100644 --- a/Samples/3_CUDA_Features/immaTensorCoreGemm/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/NsightEclipse.xml @@ -6,18 +6,18 @@ -maxrregcount=255 - cudaMemset - cudaFree - cudaEventRecord - cudaEventCreate - cudaFuncSetAttribute - cudaEventElapsedTime - cudaEventSynchronize - cudaMalloc - cudaGetLastError cudaMemcpy + cudaFree cudaGetErrorString + cudaGetLastError + cudaEventSynchronize + cudaFuncSetAttribute + cudaEventRecord + cudaMemset + cudaMalloc + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -47,6 +47,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/README.md b/Samples/3_CUDA_Features/immaTensorCoreGemm/README.md index 61b40710..db9d4802 100644 --- a/Samples/3_CUDA_Features/immaTensorCoreGemm/README.md +++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/README.md @@ -10,7 +10,7 @@ Matrix Multiply, WMMA, Tensor Cores ## Supported SM Architectures -[SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj index 79b1a319..b48ad38f 100644 --- a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/immaTensorCoreGemm.exe - compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj index 9351844d..b5931f57 100644 --- a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/immaTensorCoreGemm.exe - compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2022.vcxproj index 17495837..90ae3390 100644 --- a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/immaTensorCoreGemm.exe - compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/Makefile b/Samples/3_CUDA_Features/jacobiCudaGraphs/Makefile index 35fadaa5..fa42ed87 100644 --- a/Samples/3_CUDA_Features/jacobiCudaGraphs/Makefile +++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/NsightEclipse.xml b/Samples/3_CUDA_Features/jacobiCudaGraphs/NsightEclipse.xml index a752218c..e1572b43 100644 --- a/Samples/3_CUDA_Features/jacobiCudaGraphs/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/NsightEclipse.xml @@ -3,28 +3,28 @@ jacobiCudaGraphs - cudaGraphAddMemsetNode - cudaStreamCreateWithFlags - cudaMemcpyAsync + cudaExtent + cudaGraphLaunch + cudaGraphAddMemcpyNode cudaMallocHost cudaPitchedPtr - cudaGraphCreate - cudaMalloc - cudaPos - cudaGraphAddMemcpyNode cudaStreamEndCapture - cudaGraphExecDestroy - cudaStreamBeginCapture - cudaGraphExecKernelNodeSetParams - cudaStreamSynchronize - cudaGraphLaunch - cudaFree - cudaGraphInstantiate - cudaExtent - cudaMemsetAsync + cudaGraphCreate cudaFreeHost - cudaGraphAddKernelNode + cudaMemsetAsync + cudaMemcpyAsync + cudaGraphExecKernelNodeSetParams + cudaStreamCreateWithFlags + cudaGraphInstantiate + cudaStreamBeginCapture + cudaFree cudaGraphExecUpdate + cudaGraphAddKernelNode + cudaPos + cudaStreamSynchronize + cudaGraphAddMemsetNode + cudaMalloc + cudaGraphExecDestroy whole @@ -66,6 +66,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/README.md b/Samples/3_CUDA_Features/jacobiCudaGraphs/README.md index b9d76e69..68722187 100644 --- a/Samples/3_CUDA_Features/jacobiCudaGraphs/README.md +++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/README.md @@ -10,7 +10,7 @@ CUDA Graphs, Stream Capture, Instantiated CUDA Graph Update, Cooperative Groups ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGraphAddMemsetNode, cudaStreamCreateWithFlags, cudaMemcpyAsync, cudaMallocHost, cudaPitchedPtr, cudaGraphCreate, cudaMalloc, cudaPos, cudaGraphAddMemcpyNode, cudaStreamEndCapture, cudaGraphExecDestroy, cudaStreamBeginCapture, cudaGraphExecKernelNodeSetParams, cudaStreamSynchronize, cudaGraphLaunch, cudaFree, cudaGraphInstantiate, cudaExtent, cudaMemsetAsync, cudaFreeHost, cudaGraphAddKernelNode, cudaGraphExecUpdate +cudaExtent, cudaGraphLaunch, cudaGraphAddMemcpyNode, cudaMallocHost, cudaPitchedPtr, cudaStreamEndCapture, cudaGraphCreate, cudaFreeHost, cudaMemsetAsync, cudaMemcpyAsync, cudaGraphExecKernelNodeSetParams, cudaStreamCreateWithFlags, cudaGraphInstantiate, cudaStreamBeginCapture, cudaFree, cudaGraphExecUpdate, cudaGraphAddKernelNode, cudaPos, cudaStreamSynchronize, cudaGraphAddMemsetNode, cudaMalloc, cudaGraphExecDestroy ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj index e8a5153b..489735bc 100644 --- a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/jacobiCudaGraphs.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj index be846454..b6440eb1 100644 --- a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/jacobiCudaGraphs.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2022.vcxproj b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2022.vcxproj index e7b1ea43..2d37b087 100644 --- a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/jacobiCudaGraphs.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/Makefile b/Samples/3_CUDA_Features/memMapIPCDrv/Makefile index fd7d6ba7..ae7b17d1 100644 --- a/Samples/3_CUDA_Features/memMapIPCDrv/Makefile +++ b/Samples/3_CUDA_Features/memMapIPCDrv/Makefile @@ -274,24 +274,6 @@ ifeq ($(TARGET_OS),darwin) SAMPLE_ENABLED := 0 endif -# This sample is not supported on ARMv7 -ifeq ($(TARGET_ARCH),armv7l) - $(info >>> WARNING - memMapIPCDrv is not supported on ARMv7 - waiving sample <<<) - SAMPLE_ENABLED := 0 -endif - -# This sample is not supported on aarch64 -ifeq ($(TARGET_ARCH),aarch64) - $(info >>> WARNING - memMapIPCDrv is not supported on aarch64 - waiving sample <<<) - SAMPLE_ENABLED := 0 -endif - -# This sample is not supported on sbsa -ifeq ($(TARGET_ARCH),sbsa) - $(info >>> WARNING - memMapIPCDrv is not supported on sbsa - waiving sample <<<) - SAMPLE_ENABLED := 0 -endif - ALL_LDFLAGS := ALL_LDFLAGS += $(ALL_CCFLAGS) ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS)) diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/README.md b/Samples/3_CUDA_Features/memMapIPCDrv/README.md index 435af2ed..bace5c4f 100644 --- a/Samples/3_CUDA_Features/memMapIPCDrv/README.md +++ b/Samples/3_CUDA_Features/memMapIPCDrv/README.md @@ -10,27 +10,27 @@ CUDA Driver API, cuMemMap IPC, MMAP ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes -Linux, Windows +Linux, Windows, QNX ## Supported CPU Architecture -x86_64, ppc64le +x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuCtxSetCurrent, cuMemSetAccess, cuMemcpyDtoHAsync, cuStreamDestroy, cuInit, cuMemAddressReserve, cuCtxDestroy, cuModuleGetFunction, cuModuleLoad, cuStreamCreate, cuCtxCreate, cuMemExportToShareableHandle, cuMemAddressFree, cuMemGetAllocationGranularity, cuModuleLoadDataEx, cuDeviceGet, cuMemUnmap, cuDeviceGetAttribute, cuMemRelease, cuCtxEnablePeerAccess, cuMemMap, cuMemImportFromShareableHandle, cuMemCreate, cuStreamSynchronize, cuDeviceCanAccessPeer, cuDeviceGetCount, cuLaunchKernel, cuOccupancyMaxActiveBlocksPerMultiprocessor +cuDeviceCanAccessPeer, cuMemImportFromShareableHandle, cuModuleLoadDataEx, cuModuleGetFunction, cuMemSetAccess, cuModuleLoad, cuStreamCreate, cuMemRelease, cuInit, cuLaunchKernel, cuMemcpyDtoHAsync, cuMemCreate, cuDeviceGet, cuCtxDestroy, cuDeviceGetCount, cuMemMap, cuMemExportToShareableHandle, cuStreamSynchronize, cuCtxEnablePeerAccess, cuDeviceGetAttribute, cuOccupancyMaxActiveBlocksPerMultiprocessor, cuCtxSetCurrent, cuMemGetAllocationGranularity, cuMemAddressFree, cuMemUnmap, cuCtxCreate, cuStreamDestroy, cuMemAddressReserve ## Dependencies needed to build/run [IPC](../../../README.md#ipc) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run @@ -52,9 +52,9 @@ $ cd $ make ``` The samples makefiles can take advantage of certain options: -* **TARGET_ARCH=** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le. +* **TARGET_ARCH=** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l, aarch64. By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.
-`$ make TARGET_ARCH=x86_64`
`$ make TARGET_ARCH=ppc64le`
+`$ make TARGET_ARCH=x86_64`
`$ make TARGET_ARCH=ppc64le`
`$ make TARGET_ARCH=armv7l`
`$ make TARGET_ARCH=aarch64`
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details. * **dbg=1** - build with debug symbols ``` diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj index 02b0d7ea..4d7d058a 100644 --- a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -112,6 +112,6 @@ - + diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj index 3abc66c6..287fbc92 100644 --- a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -108,6 +108,6 @@ - + diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2022.vcxproj b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2022.vcxproj index 8da6a6cd..d6bc39c6 100644 --- a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -108,6 +108,6 @@ - + diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp index ba275b64..19d6aa60 100644 --- a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp +++ b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp @@ -595,10 +595,6 @@ static void parentProcess(char *app) { // Host code int main(int argc, char **argv) { -#if defined(__arm__) || defined(__aarch64__) - printf("Not supported on ARM\n"); - return EXIT_WAIVED; -#else // Initialize checkCudaErrors(cuInit(0)); @@ -608,7 +604,6 @@ int main(int argc, char **argv) { childProcess(atoi(argv[1]), atoi(argv[2]), argv); } return EXIT_SUCCESS; -#endif } bool inline findModulePath(const char *module_file, string &module_path, @@ -643,4 +638,4 @@ bool inline findModulePath(const char *module_file, string &module_path, return true; } -} \ No newline at end of file +} diff --git a/Samples/3_CUDA_Features/newdelete/Makefile b/Samples/3_CUDA_Features/newdelete/Makefile index 9e4ba3b6..48c352f3 100644 --- a/Samples/3_CUDA_Features/newdelete/Makefile +++ b/Samples/3_CUDA_Features/newdelete/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/newdelete/NsightEclipse.xml b/Samples/3_CUDA_Features/newdelete/NsightEclipse.xml index edcd5270..ae7639e4 100644 --- a/Samples/3_CUDA_Features/newdelete/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/newdelete/NsightEclipse.xml @@ -3,11 +3,11 @@ newdelete - cudaDeviceSetLimit + cudaMemcpy cudaFree cudaDeviceSynchronize + cudaDeviceSetLimit cudaMalloc - cudaMemcpy whole @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/newdelete/README.md b/Samples/3_CUDA_Features/newdelete/README.md index a976004d..1df54ae2 100644 --- a/Samples/3_CUDA_Features/newdelete/README.md +++ b/Samples/3_CUDA_Features/newdelete/README.md @@ -10,7 +10,7 @@ Device Memory Allocation, C++ Templates ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceSetLimit, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaDeviceSetLimit, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/newdelete/newdelete_vs2017.vcxproj b/Samples/3_CUDA_Features/newdelete/newdelete_vs2017.vcxproj index 04f1a3b2..f5546f27 100644 --- a/Samples/3_CUDA_Features/newdelete/newdelete_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/newdelete/newdelete_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/newdelete.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/newdelete/newdelete_vs2019.vcxproj b/Samples/3_CUDA_Features/newdelete/newdelete_vs2019.vcxproj index 75e28c88..4f6a09f4 100644 --- a/Samples/3_CUDA_Features/newdelete/newdelete_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/newdelete/newdelete_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/newdelete.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/newdelete/newdelete_vs2022.vcxproj b/Samples/3_CUDA_Features/newdelete/newdelete_vs2022.vcxproj index 20c428e5..cec331d8 100644 --- a/Samples/3_CUDA_Features/newdelete/newdelete_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/newdelete/newdelete_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/newdelete.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/ptxjit/NsightEclipse.xml b/Samples/3_CUDA_Features/ptxjit/NsightEclipse.xml index a5dc2e54..2beac119 100644 --- a/Samples/3_CUDA_Features/ptxjit/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/ptxjit/NsightEclipse.xml @@ -3,18 +3,18 @@ ptxjit + cuLaunchKernel + cuModuleLoadData + cuLinkCreate cuModuleGetFunction cuLinkAddData - cuModuleLoadData - cuLaunchKernel cuModuleUnload - cuLinkComplete - cuLinkCreate cuLinkDestroy - cudaDriverGetVersion - cudaFree + cuLinkComplete cudaMalloc + cudaDriverGetVersion cudaMemcpy + cudaFree separate @@ -57,6 +57,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/ptxjit/README.md b/Samples/3_CUDA_Features/ptxjit/README.md index 021b4b40..314c6284 100644 --- a/Samples/3_CUDA_Features/ptxjit/README.md +++ b/Samples/3_CUDA_Features/ptxjit/README.md @@ -10,7 +10,7 @@ CUDA Driver API ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuLinkAddData, cuModuleLoadData, cuLaunchKernel, cuModuleUnload, cuLinkComplete, cuLinkCreate, cuLinkDestroy +cuLaunchKernel, cuModuleLoadData, cuLinkCreate, cuModuleGetFunction, cuLinkAddData, cuModuleUnload, cuLinkDestroy, cuLinkComplete ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDriverGetVersion, cudaFree, cudaMalloc, cudaMemcpy +cudaMalloc, cudaDriverGetVersion, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2017.vcxproj b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2017.vcxproj index 6c7fa953..8544a38c 100644 --- a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -111,6 +111,6 @@ - + diff --git a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2019.vcxproj b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2019.vcxproj index 9d3b8c11..d0c152c8 100644 --- a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2022.vcxproj b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2022.vcxproj index f8eecb9f..c4dbf912 100644 --- a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/Makefile b/Samples/3_CUDA_Features/simpleCudaGraphs/Makefile index a64fbbf6..d956e9b4 100644 --- a/Samples/3_CUDA_Features/simpleCudaGraphs/Makefile +++ b/Samples/3_CUDA_Features/simpleCudaGraphs/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/NsightEclipse.xml b/Samples/3_CUDA_Features/simpleCudaGraphs/NsightEclipse.xml index 0b60949b..56db08fb 100644 --- a/Samples/3_CUDA_Features/simpleCudaGraphs/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/simpleCudaGraphs/NsightEclipse.xml @@ -3,37 +3,37 @@ simpleCudaGraphs - cudaGraphAddMemsetNode - cudaGraphsUsingStreamCapture - cudaMemcpyAsync - cudaGraphGetNodes - cudaStreamDestroy - cudaMallocHost cudaGraphClone - cudaEventCreate - cudaPitchedPtr - cudaGraphCreate - cudaMalloc - cudaPos + cudaExtent + cudaGraphLaunch + cudaStreamCreate + cudaLaunchHostFunc cudaGraphAddMemcpyNode + cudaMallocHost + cudaPitchedPtr cudaStreamEndCapture - cudaGraphExecDestroy - cudaStreamBeginCapture + cudaGraphCreate + cudaFreeHost + cudaGraphGetNodes + cudaMemsetAsync + cudaMemcpyAsync cudaGraphAddHostNode + cudaGraphInstantiate + cudaStreamDestroy + cudaStreamBeginCapture + cudaStreamWaitEvent + cudaEventCreate + cudaMalloc + cudaFree + cudaPos + cudaGraphAddKernelNode + cudaGraphDestroy + cudaEventRecord cudaGraphsManual cudaStreamSynchronize - cudaGraphDestroy - cudaGraphLaunch - cudaStreamWaitEvent - cudaFree - cudaEventRecord - cudaStreamCreate - cudaGraphInstantiate - cudaLaunchHostFunc - cudaExtent - cudaMemsetAsync - cudaFreeHost - cudaGraphAddKernelNode + cudaGraphAddMemsetNode + cudaGraphsUsingStreamCapture + cudaGraphExecDestroy whole @@ -73,6 +73,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/README.md b/Samples/3_CUDA_Features/simpleCudaGraphs/README.md index 9a2c9249..b421b2fb 100644 --- a/Samples/3_CUDA_Features/simpleCudaGraphs/README.md +++ b/Samples/3_CUDA_Features/simpleCudaGraphs/README.md @@ -10,7 +10,7 @@ CUDA Graphs, Stream Capture ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGraphAddMemsetNode, cudaGraphsUsingStreamCapture, cudaMemcpyAsync, cudaGraphGetNodes, cudaStreamDestroy, cudaMallocHost, cudaGraphClone, cudaEventCreate, cudaPitchedPtr, cudaGraphCreate, cudaMalloc, cudaPos, cudaGraphAddMemcpyNode, cudaStreamEndCapture, cudaGraphExecDestroy, cudaStreamBeginCapture, cudaGraphAddHostNode, cudaGraphsManual, cudaStreamSynchronize, cudaGraphDestroy, cudaGraphLaunch, cudaStreamWaitEvent, cudaFree, cudaEventRecord, cudaStreamCreate, cudaGraphInstantiate, cudaLaunchHostFunc, cudaExtent, cudaMemsetAsync, cudaFreeHost, cudaGraphAddKernelNode +cudaGraphClone, cudaExtent, cudaGraphLaunch, cudaStreamCreate, cudaLaunchHostFunc, cudaGraphAddMemcpyNode, cudaMallocHost, cudaPitchedPtr, cudaStreamEndCapture, cudaGraphCreate, cudaFreeHost, cudaGraphGetNodes, cudaMemsetAsync, cudaMemcpyAsync, cudaGraphAddHostNode, cudaGraphInstantiate, cudaStreamDestroy, cudaStreamBeginCapture, cudaStreamWaitEvent, cudaEventCreate, cudaMalloc, cudaFree, cudaPos, cudaGraphAddKernelNode, cudaGraphDestroy, cudaEventRecord, cudaGraphsManual, cudaStreamSynchronize, cudaGraphAddMemsetNode, cudaGraphsUsingStreamCapture, cudaGraphExecDestroy ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj index d9e1f37e..a9525b01 100644 --- a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleCudaGraphs.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj index 7d3dfb84..168b88ab 100644 --- a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCudaGraphs.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2022.vcxproj b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2022.vcxproj index b41246b0..58840c80 100644 --- a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCudaGraphs.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/Makefile b/Samples/3_CUDA_Features/tf32TensorCoreGemm/Makefile index eee8e843..a5c51424 100644 --- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/Makefile +++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/Makefile @@ -322,9 +322,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 80 86 87 +SMS ?= 80 86 87 90 else -SMS ?= 80 86 +SMS ?= 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/tf32TensorCoreGemm/NsightEclipse.xml index 8213c761..f21f3895 100644 --- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/NsightEclipse.xml @@ -7,18 +7,18 @@ --maxrregcount=128 - cudaMemset - cudaFree - cudaEventRecord - cudaEventCreate - cudaFuncSetAttribute - cudaEventElapsedTime - cudaEventSynchronize - cudaMalloc - cudaGetLastError cudaMemcpy + cudaFree cudaGetErrorString + cudaGetLastError + cudaEventSynchronize + cudaFuncSetAttribute + cudaEventRecord + cudaMemset + cudaMalloc + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -53,6 +53,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/README.md b/Samples/3_CUDA_Features/tf32TensorCoreGemm/README.md index 343eb6de..d7f41f68 100644 --- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/README.md +++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/README.md @@ -10,7 +10,7 @@ Matrix Multiply, WMMA, Tensor Cores ## Supported SM Architectures -[SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [CPP11](../../../README.md#cpp11) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj index 6b002d88..2948b39d 100644 --- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/tf32TensorCoreGemm.exe - compute_80,sm_80;compute_86,sm_86; + compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj index 4166d39e..7a06218b 100644 --- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/tf32TensorCoreGemm.exe - compute_80,sm_80;compute_86,sm_86; + compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2022.vcxproj index c4534b65..f7b1d1a0 100644 --- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/tf32TensorCoreGemm.exe - compute_80,sm_80;compute_86,sm_86; + compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile index e743176e..aa25f151 100644 --- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile +++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile @@ -304,9 +304,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/NsightEclipse.xml b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/NsightEclipse.xml index 7f5952b9..f0457b12 100644 --- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/NsightEclipse.xml +++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/NsightEclipse.xml @@ -6,11 +6,11 @@ --std=c++11 - cudaMemset + cudaMemcpy cudaFree cudaDeviceGetAttribute + cudaMemset cudaMalloc - cudaMemcpy @@ -50,6 +50,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/README.md b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/README.md index 69c27bdd..f40b05ef 100644 --- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/README.md +++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/README.md @@ -10,7 +10,7 @@ Cooperative Groups, Atomic Intrinsics ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaDeviceGetAttribute, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaFree, cudaDeviceGetAttribute, cudaMemset, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj index d7339145..0e2d5973 100644 --- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj +++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/warpAggregatedAtomicsCG.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj index 36bd9bca..a15057bb 100644 --- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj +++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/warpAggregatedAtomicsCG.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2022.vcxproj b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2022.vcxproj index ff0902b4..8567bb36 100644 --- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2022.vcxproj +++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/warpAggregatedAtomicsCG.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2017.vcxproj index 695d8b1a..61a190ad 100644 --- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -117,6 +117,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2019.vcxproj index 702ecd0a..438dd31e 100644 --- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2022.vcxproj index 0e5f2929..86780849 100644 --- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/NsightEclipse.xml index cc161f14..60bd7e9d 100644 --- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/NsightEclipse.xml @@ -3,12 +3,12 @@ FilterBorderControlNPP - cudaDeviceReset - cudaGetDeviceCount - cudaDriverGetVersion - cudaDeviceInit - cudaSetDevice cudaRuntimeGetVersion + cudaDeviceReset + cudaSetDevice + cudaGetDeviceCount + cudaDeviceInit + cudaDriverGetVersion cudaGetDeviceProperties @@ -68,6 +68,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/README.md b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/README.md index 23f2bfb8..54a652ad 100644 --- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/README.md +++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/README.md @@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceReset, cudaGetDeviceCount, cudaDriverGetVersion, cudaDeviceInit, cudaSetDevice, cudaRuntimeGetVersion, cudaGetDeviceProperties +cudaRuntimeGetVersion, cudaDeviceReset, cudaSetDevice, cudaGetDeviceCount, cudaDeviceInit, cudaDriverGetVersion, cudaGetDeviceProperties ## Dependencies needed to build/run [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj index dd819226..17822bcb 100644 --- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj index d2ac6df2..75322608 100644 --- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2022.vcxproj b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2022.vcxproj index 1de5e019..a40090f1 100644 --- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/NsightEclipse.xml b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/NsightEclipse.xml index 40c95ec0..34ed799b 100644 --- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/NsightEclipse.xml @@ -3,13 +3,13 @@ MersenneTwisterGP11213 + cudaStreamCreateWithFlags cudaStreamDestroy cudaFree cudaMallocHost cudaFreeHost - cudaMalloc - cudaStreamCreateWithFlags cudaStreamSynchronize + cudaMalloc cudaMemcpyAsync @@ -58,6 +58,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/README.md b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/README.md index 9af50ae2..6244164c 100644 --- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/README.md +++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/README.md @@ -10,7 +10,7 @@ CURAND Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaMemcpyAsync +cudaStreamCreateWithFlags, cudaStreamDestroy, cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync ## Dependencies needed to build/run [CURAND](../../../README.md#curand) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml b/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml index e166cad3..44f306e2 100644 --- a/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml @@ -5,15 +5,15 @@ cuRand cuEqual - cudaFree - cudaStreamCreate - cudaDeviceSynchronize - cudaMalloc - cudaGetLastError cudaMemcpy cudaGetErrorString - cudaGetDeviceProperties + cudaFree + cudaGetLastError + cudaDeviceSynchronize cudaGetDevice + cudaMalloc + cudaStreamCreate + cudaGetDeviceProperties whole @@ -58,6 +58,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/README.md b/Samples/4_CUDA_Libraries/batchCUBLAS/README.md index ade22850..63575022 100644 --- a/Samples/4_CUDA_Libraries/batchCUBLAS/README.md +++ b/Samples/4_CUDA_Libraries/batchCUBLAS/README.md @@ -10,7 +10,7 @@ Linear Algebra, CUBLAS Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -26,14 +26,14 @@ x86_64, ppc64le, armv7l cuRand, cuEqual ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaStreamCreate, cudaDeviceSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaGetErrorString, cudaFree, cudaGetLastError, cudaDeviceSynchronize, cudaGetDevice, cudaMalloc, cudaStreamCreate, cudaGetDeviceProperties ## Dependencies needed to build/run [CUBLAS](../../../README.md#cublas) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2017.vcxproj b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2017.vcxproj index 8df8d495..2c41bea3 100644 --- a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2019.vcxproj b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2019.vcxproj index 3f42f964..1bd17bac 100644 --- a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2022.vcxproj b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2022.vcxproj index 1823a30b..dba07dfd 100644 --- a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml index 821f1eea..9cd72dd8 100644 --- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml @@ -3,19 +3,19 @@ batchedLabelMarkersAndLabelCompressionNPP - cudaFree - cudaMallocHost - cudaMallocPitch - cudaDeviceGetAttribute - cudaFreeHost - cudaDriverGetVersion - cudaMalloc - cudaStreamGetFlags cudaRuntimeGetVersion + cudaMallocPitch + cudaFree + cudaDeviceGetAttribute + cudaMallocHost + cudaDriverGetVersion + cudaFreeHost + cudaGetDevice + cudaStreamGetFlags cudaStreamSynchronize + cudaMalloc cudaMemcpyAsync cudaGetDeviceProperties - cudaGetDevice whole @@ -68,6 +68,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md index 97dbcd1d..ddc106f3 100644 --- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md +++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md @@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library, Using NPP Batch Functions ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMallocHost, cudaMallocPitch, cudaDeviceGetAttribute, cudaFreeHost, cudaDriverGetVersion, cudaMalloc, cudaStreamGetFlags, cudaRuntimeGetVersion, cudaStreamSynchronize, cudaMemcpyAsync, cudaGetDeviceProperties, cudaGetDevice +cudaRuntimeGetVersion, cudaMallocPitch, cudaFree, cudaDeviceGetAttribute, cudaMallocHost, cudaDriverGetVersion, cudaFreeHost, cudaGetDevice, cudaStreamGetFlags, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync, cudaGetDeviceProperties ## Dependencies needed to build/run [NPP](../../../README.md#npp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj index 59e207a8..da25b507 100644 --- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj index 148be7cb..928dc419 100644 --- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2022.vcxproj index 1f5a0ff4..5b9408aa 100644 --- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml index ae3b9b46..da9c0d55 100644 --- a/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml @@ -3,8 +3,8 @@ boxFilterNPP - cudaDriverGetVersion cudaRuntimeGetVersion + cudaDriverGetVersion whole @@ -64,6 +64,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/README.md b/Samples/4_CUDA_Libraries/boxFilterNPP/README.md index 3de69529..f647397c 100644 --- a/Samples/4_CUDA_Libraries/boxFilterNPP/README.md +++ b/Samples/4_CUDA_Libraries/boxFilterNPP/README.md @@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDriverGetVersion, cudaRuntimeGetVersion +cudaRuntimeGetVersion, cudaDriverGetVersion ## Dependencies needed to build/run [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2017.vcxproj index 693cda83..bf2a30ab 100644 --- a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -117,6 +117,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2019.vcxproj index 0f7a3fa0..a7ab43bb 100644 --- a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2022.vcxproj index 1ef674c0..5b3e3fb3 100644 --- a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/NsightEclipse.xml index d4dca5ba..26b3f456 100644 --- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/NsightEclipse.xml @@ -3,13 +3,13 @@ cannyEdgeDetectorNPP + cudaRuntimeGetVersion cudaFree + cudaSetDevice cudaGetDeviceCount + cudaDeviceInit cudaDriverGetVersion cudaMalloc - cudaDeviceInit - cudaSetDevice - cudaRuntimeGetVersion cudaGetDeviceProperties @@ -66,6 +66,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md index 41630e5d..01493d71 100644 --- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md +++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md @@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGetDeviceCount, cudaDriverGetVersion, cudaMalloc, cudaDeviceInit, cudaSetDevice, cudaRuntimeGetVersion, cudaGetDeviceProperties +cudaRuntimeGetVersion, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaDeviceInit, cudaDriverGetVersion, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj index a2fe1e4c..338a498d 100644 --- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -117,6 +117,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj index 0510328c..91164159 100644 --- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2022.vcxproj index 29e20532..31270caa 100644 --- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradient/NsightEclipse.xml index 7fb14579..99a85b35 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradient/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/conjugateGradient/NsightEclipse.xml @@ -3,10 +3,10 @@ conjugateGradient + cudaMemcpy cudaFree cudaDeviceSynchronize cudaMalloc - cudaMemcpy cudaGetDeviceProperties @@ -59,6 +59,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/README.md b/Samples/4_CUDA_Libraries/conjugateGradient/README.md index c8521987..9f664782 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradient/README.md +++ b/Samples/4_CUDA_Libraries/conjugateGradient/README.md @@ -10,7 +10,7 @@ Linear Algebra, CUBLAS Library, CUSPARSE Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2017.vcxproj index 3f9f1fc8..42096e5a 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2019.vcxproj index 9755b7c0..44cdb92c 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2022.vcxproj index 2d9bb918..afb56e1d 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/Makefile b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/Makefile index af3ffc45..4c4c95d5 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/Makefile +++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/NsightEclipse.xml index 3a4d5e98..0a766802 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/NsightEclipse.xml @@ -3,23 +3,23 @@ conjugateGradientCudaGraphs - cudaStreamDestroy - cudaFree - cudaGraphExecDestroy - cudaMallocHost - cudaStreamCreate cudaGraphInstantiate - cudaOccupancyMaxPotentialBlockSize + cudaStreamDestroy cudaStreamBeginCapture - cudaMemsetAsync + cudaFree + cudaMallocHost cudaStreamEndCapture - cudaFreeHost - cudaMalloc - cudaStreamSynchronize - cudaMemcpyAsync cudaGraphDestroy - cudaGetDeviceProperties + cudaFreeHost cudaGraphLaunch + cudaStreamCreate + cudaStreamSynchronize + cudaOccupancyMaxPotentialBlockSize + cudaMalloc + cudaMemcpyAsync + cudaMemsetAsync + cudaGetDeviceProperties + cudaGraphExecDestroy whole @@ -71,6 +71,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md index 8d634cde..787c89a4 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md +++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md @@ -10,7 +10,7 @@ Linear Algebra, CUBLAS Library, CUSPARSE Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaGraphExecDestroy, cudaMallocHost, cudaStreamCreate, cudaGraphInstantiate, cudaOccupancyMaxPotentialBlockSize, cudaStreamBeginCapture, cudaMemsetAsync, cudaStreamEndCapture, cudaFreeHost, cudaMalloc, cudaStreamSynchronize, cudaMemcpyAsync, cudaGraphDestroy, cudaGetDeviceProperties, cudaGraphLaunch +cudaGraphInstantiate, cudaStreamDestroy, cudaStreamBeginCapture, cudaFree, cudaMallocHost, cudaStreamEndCapture, cudaGraphDestroy, cudaFreeHost, cudaGraphLaunch, cudaStreamCreate, cudaStreamSynchronize, cudaOccupancyMaxPotentialBlockSize, cudaMalloc, cudaMemcpyAsync, cudaMemsetAsync, cudaGetDeviceProperties, cudaGraphExecDestroy ## Dependencies needed to build/run [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj index 8bd52098..2a1e00ec 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/conjugateGradientCudaGraphs.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj index 1838fc79..c1e36760 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/conjugateGradientCudaGraphs.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2022.vcxproj index b07dfc7b..f641b210 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/conjugateGradientCudaGraphs.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/Makefile b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/Makefile index 2cd57b05..6e5d3435 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/Makefile +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/Makefile @@ -322,9 +322,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 61 70 72 75 80 86 87 +SMS ?= 61 70 72 75 80 86 87 90 else -SMS ?= 60 61 70 75 80 86 +SMS ?= 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/NsightEclipse.xml index 55c72749..0deaa733 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/NsightEclipse.xml @@ -7,15 +7,15 @@ cudaFree - cudaEventRecord cudaMallocManaged - cudaOccupancyMaxActiveBlocksPerMultiprocessor - cudaEventCreate - cudaEventElapsedTime cudaDeviceSynchronize - cudaEventDestroy + cudaEventRecord cudaLaunchCooperativeKernel + cudaEventDestroy + cudaEventElapsedTime + cudaOccupancyMaxActiveBlocksPerMultiprocessor cudaGetDeviceProperties + cudaEventCreate whole @@ -60,6 +60,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md index 804955b6..e8c0643c 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md @@ -10,7 +10,7 @@ Unified Memory, Linear Algebra, Cooperative Groups, MultiBlock Cooperative Group ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaEventRecord, cudaMallocManaged, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventDestroy, cudaLaunchCooperativeKernel, cudaGetDeviceProperties +cudaFree, cudaMallocManaged, cudaDeviceSynchronize, cudaEventRecord, cudaLaunchCooperativeKernel, cudaEventDestroy, cudaEventElapsedTime, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [UVM](../../../README.md#uvm), [MBCG](../../../README.md#mbcg) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj index 0fa1a17e..d3fc2fdf 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/conjugateGradientMultiBlockCG.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj index 7301e032..6c064a95 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/conjugateGradientMultiBlockCG.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2022.vcxproj index 57816f0b..0e4d81be 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/conjugateGradientMultiBlockCG.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/Makefile b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/Makefile index 06f2703e..31713181 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/Makefile +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/Makefile @@ -322,9 +322,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 61 70 72 75 80 86 87 +SMS ?= 61 70 72 75 80 86 87 90 else -SMS ?= 60 61 70 75 80 86 +SMS ?= 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/NsightEclipse.xml index 7852487a..ef83507c 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/NsightEclipse.xml @@ -8,22 +8,22 @@ --std=c++11 - cudaDeviceEnablePeerAccess - cudaMemset - cudaFree - cudaMallocManaged - cudaMemPrefetchAsync cudaHostAlloc - cudaOccupancyMaxActiveBlocksPerMultiprocessor - cudaStreamCreate - cudaGetDeviceCount - cudaFreeHost - cudaSetDevice - cudaDeviceCanAccessPeer + cudaMemPrefetchAsync + cudaFree cudaLaunchCooperativeKernel - cudaStreamSynchronize - cudaMemAdvise + cudaMallocManaged + cudaSetDevice + cudaGetDeviceCount cudaGetDeviceProperties + cudaFreeHost + cudaMemset + cudaStreamCreate + cudaStreamSynchronize + cudaDeviceEnablePeerAccess + cudaMemAdvise + cudaOccupancyMaxActiveBlocksPerMultiprocessor + cudaDeviceCanAccessPeer whole @@ -71,6 +71,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md index e21a3507..9d77bf38 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md @@ -10,7 +10,7 @@ Unified Memory, Linear Algebra, Cooperative Groups, MultiDevice Cooperative Grou ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceEnablePeerAccess, cudaMemset, cudaFree, cudaMallocManaged, cudaMemPrefetchAsync, cudaHostAlloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaStreamCreate, cudaGetDeviceCount, cudaFreeHost, cudaSetDevice, cudaDeviceCanAccessPeer, cudaLaunchCooperativeKernel, cudaStreamSynchronize, cudaMemAdvise, cudaGetDeviceProperties +cudaHostAlloc, cudaMemPrefetchAsync, cudaFree, cudaLaunchCooperativeKernel, cudaMallocManaged, cudaSetDevice, cudaGetDeviceCount, cudaGetDeviceProperties, cudaFreeHost, cudaMemset, cudaStreamCreate, cudaStreamSynchronize, cudaDeviceEnablePeerAccess, cudaMemAdvise, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaDeviceCanAccessPeer ## Dependencies needed to build/run [UVM](../../../README.md#uvm), [MDCG](../../../README.md#mdcg), [CPP11](../../../README.md#cpp11) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj index 85930d36..b58051c3 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/conjugateGradientMultiDeviceCG.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj index 4a5df0b8..b422370d 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/conjugateGradientMultiDeviceCG.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2022.vcxproj index 91ab39cc..41caff0d 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/conjugateGradientMultiDeviceCG.exe - compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/NsightEclipse.xml index 62d843c0..760d5de3 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/NsightEclipse.xml @@ -3,10 +3,10 @@ conjugateGradientPrecond - cudaMemset - cudaFree - cudaMalloc cudaMemcpy + cudaFree + cudaMemset + cudaMalloc cudaGetDeviceProperties @@ -57,6 +57,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/README.md b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/README.md index 6e1116d5..bded9817 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/README.md +++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/README.md @@ -10,7 +10,7 @@ Linear Algebra, CUBLAS Library, CUSPARSE Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaMemset, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2017.vcxproj index bd750e90..3af1df6d 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2019.vcxproj index 593817cd..0721d9eb 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2022.vcxproj index d368d236..e601f5fd 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/Makefile b/Samples/4_CUDA_Libraries/conjugateGradientUM/Makefile index b16c5e68..8f72576d 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientUM/Makefile +++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientUM/NsightEclipse.xml index 3e59ba5d..ca7258c9 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientUM/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/NsightEclipse.xml @@ -59,6 +59,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/README.md b/Samples/4_CUDA_Libraries/conjugateGradientUM/README.md index bb3253f1..ac9fd252 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientUM/README.md +++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/README.md @@ -10,7 +10,7 @@ Unified Memory, Linear Algebra, CUBLAS Library, CUSPARSE Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -30,7 +30,7 @@ cudaFree, cudaMallocManaged, cudaDeviceSynchronize, cudaMalloc, cudaGetDevicePro ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2017.vcxproj index 62f9d13d..4a8f1f5f 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/conjugateGradientUM.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2019.vcxproj index a6541e3d..44fd5a52 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/conjugateGradientUM.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2022.vcxproj index 045228e5..05b5205c 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/conjugateGradientUM.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/Makefile b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/Makefile index 19b23678..9c8f53fe 100644 --- a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/Makefile +++ b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/Makefile @@ -330,9 +330,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/NsightEclipse.xml index fdf187ae..0d4cf217 100644 --- a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/NsightEclipse.xml @@ -6,14 +6,14 @@ --std=c++11 + cudaStreamCreateWithFlags cudaStreamDestroy cudaFree - cudaStreamSynchronize - cudaMemsetAsync - cudaMalloc - cudaStreamCreateWithFlags - cudaSetDevice cudaGetErrorName + cudaSetDevice + cudaStreamSynchronize + cudaMalloc + cudaMemsetAsync cudaMemcpyAsync @@ -51,6 +51,7 @@ sm80 sm86 sm87 + sm90 aarch64 diff --git a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/README.md b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/README.md index e4b50ac9..e18dc7d8 100644 --- a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/README.md +++ b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/README.md @@ -10,7 +10,7 @@ cuDLA, Data Parallel Algorithms, Image Processing ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaStreamSynchronize, cudaMemsetAsync, cudaMalloc, cudaStreamCreateWithFlags, cudaSetDevice, cudaGetErrorName, cudaMemcpyAsync +cudaStreamCreateWithFlags, cudaStreamDestroy, cudaFree, cudaGetErrorName, cudaSetDevice, cudaStreamSynchronize, cudaMalloc, cudaMemsetAsync, cudaMemcpyAsync ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cuDLAHybridMode/Makefile b/Samples/4_CUDA_Libraries/cuDLAHybridMode/Makefile index 70ba53d6..5cf7413a 100644 --- a/Samples/4_CUDA_Libraries/cuDLAHybridMode/Makefile +++ b/Samples/4_CUDA_Libraries/cuDLAHybridMode/Makefile @@ -330,9 +330,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/cuDLAHybridMode/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuDLAHybridMode/NsightEclipse.xml index 07ca19c5..d59cdec6 100644 --- a/Samples/4_CUDA_Libraries/cuDLAHybridMode/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cuDLAHybridMode/NsightEclipse.xml @@ -6,14 +6,14 @@ --std=c++11 + cudaStreamCreateWithFlags cudaStreamDestroy cudaFree - cudaStreamSynchronize - cudaMemsetAsync - cudaMalloc - cudaStreamCreateWithFlags - cudaSetDevice cudaGetErrorName + cudaSetDevice + cudaStreamSynchronize + cudaMalloc + cudaMemsetAsync cudaMemcpyAsync @@ -51,6 +51,7 @@ sm80 sm86 sm87 + sm90 aarch64 diff --git a/Samples/4_CUDA_Libraries/cuDLAHybridMode/README.md b/Samples/4_CUDA_Libraries/cuDLAHybridMode/README.md index 2f4559bb..12799c18 100644 --- a/Samples/4_CUDA_Libraries/cuDLAHybridMode/README.md +++ b/Samples/4_CUDA_Libraries/cuDLAHybridMode/README.md @@ -10,7 +10,7 @@ cuDLA, Data Parallel Algorithms, Image Processing ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaStreamSynchronize, cudaMemsetAsync, cudaMalloc, cudaStreamCreateWithFlags, cudaSetDevice, cudaGetErrorName, cudaMemcpyAsync +cudaStreamCreateWithFlags, cudaStreamDestroy, cudaFree, cudaGetErrorName, cudaSetDevice, cudaStreamSynchronize, cudaMalloc, cudaMemsetAsync, cudaMemcpyAsync ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/Makefile b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/Makefile index 4893fee9..76bfe834 100644 --- a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/Makefile +++ b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/Makefile @@ -333,9 +333,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/NsightEclipse.xml index b9db9e0b..7f3ef809 100644 --- a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/NsightEclipse.xml @@ -45,6 +45,7 @@ sm80 sm86 sm87 + sm90 aarch64 diff --git a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/README.md b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/README.md index 6ca250ba..21cdfb8f 100644 --- a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/README.md +++ b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/README.md @@ -10,7 +10,7 @@ cuDLA, Data Parallel Algorithms, Image Processing ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -27,7 +27,7 @@ aarch64 ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/findnvsci.mk b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/findnvsci.mk index 1934e138..0d6d157c 100644 --- a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/findnvsci.mk +++ b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/findnvsci.mk @@ -56,6 +56,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -101,6 +102,11 @@ ifeq ("$(TARGET_OS)","linux") GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif + ifeq ("$(KYLIN)","0") + GLPATH ?= /usr/lib64/nvidia + GLLINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + endif NVSCIBUFLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscibuf.so -print 2>/dev/null) NVSCISYNCLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscisync.so -print 2>/dev/null) diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/NsightEclipse.xml index ba8ad201..9405b394 100644 --- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/NsightEclipse.xml @@ -5,13 +5,13 @@ cuDoubleComplex cuComplex - cudaMemset - cudaFree - cudaStreamDestroy - cudaStreamCreate - cudaDeviceSynchronize - cudaMalloc cudaMemcpy + cudaStreamDestroy + cudaFree + cudaDeviceSynchronize + cudaMemset + cudaMalloc + cudaStreamCreate whole @@ -68,6 +68,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md index 92210b8f..0b311943 100644 --- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md +++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md @@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -26,14 +26,14 @@ x86_64, ppc64le, aarch64 cuDoubleComplex, cuComplex ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaStreamDestroy, cudaStreamCreate, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaStreamDestroy, cudaFree, cudaDeviceSynchronize, cudaMemset, cudaMalloc, cudaStreamCreate ## Dependencies needed to build/run [CUSOLVER](../../../README.md#cusolver), [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj index 90d7b869..cd859b9b 100644 --- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -109,6 +109,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj index 2b63d7d1..6155be1a 100644 --- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2022.vcxproj index 1f123af8..98448924 100644 --- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/Makefile b/Samples/4_CUDA_Libraries/cuSolverRf/Makefile index 27824bc2..92fdc984 100644 --- a/Samples/4_CUDA_Libraries/cuSolverRf/Makefile +++ b/Samples/4_CUDA_Libraries/cuSolverRf/Makefile @@ -283,9 +283,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverRf/NsightEclipse.xml index 2cf3f041..84c56c89 100644 --- a/Samples/4_CUDA_Libraries/cuSolverRf/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cuSolverRf/NsightEclipse.xml @@ -3,15 +3,15 @@ cuSolverRf + cuGet cuDoubleComplex cuComplex - cuGet + cudaMemcpy cudaStreamDestroy cudaFree - cudaStreamCreate cudaDeviceSynchronize cudaMalloc - cudaMemcpy + cudaStreamCreate whole @@ -67,6 +67,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/README.md b/Samples/4_CUDA_Libraries/cuSolverRf/README.md index d0a99cf4..c268cf07 100644 --- a/Samples/4_CUDA_Libraries/cuSolverRf/README.md +++ b/Samples/4_CUDA_Libraries/cuSolverRf/README.md @@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,17 +23,17 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuDoubleComplex, cuComplex, cuGet +cuGet, cuDoubleComplex, cuComplex ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaStreamDestroy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaStreamCreate ## Dependencies needed to build/run [CUSOLVER](../../../README.md#cusolver), [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2017.vcxproj index fc5a4d2f..bcc7a6a4 100644 --- a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cuSolverRf.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2019.vcxproj index 02802989..16948fcd 100644 --- a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cuSolverRf.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2022.vcxproj index 9cd7669f..6de3db9b 100644 --- a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cuSolverRf.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/Makefile b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/Makefile index 59b0c3d0..04cdc6ea 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/Makefile +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/Makefile @@ -283,9 +283,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/NsightEclipse.xml index cc3a57ad..f6b92a54 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/NsightEclipse.xml @@ -7,9 +7,9 @@ cuComplex cudaStreamDestroy cudaFree - cudaStreamCreate cudaDeviceSynchronize cudaMalloc + cudaStreamCreate cudaMemcpyAsync @@ -68,6 +68,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md index 2ac87f31..25ce2865 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md @@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -26,14 +26,14 @@ x86_64, ppc64le, armv7l cuDoubleComplex, cuComplex ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaDeviceSynchronize, cudaMalloc, cudaMemcpyAsync +cudaStreamDestroy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaStreamCreate, cudaMemcpyAsync ## Dependencies needed to build/run [CUSOLVER](../../../README.md#cusolver), [CUSPARSE](../../../README.md#cusparse) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj index 78512bbd..9978ec3e 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cuSolverSp_LinearSolver.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj index fe619501..a7aef7dd 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cuSolverSp_LinearSolver.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2022.vcxproj index 0d22534d..d6a6ef40 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cuSolverSp_LinearSolver.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/Makefile b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/Makefile index e80902e4..649da6f8 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/Makefile +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/Makefile @@ -283,9 +283,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/NsightEclipse.xml index 32bfa4d3..65fa8556 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/NsightEclipse.xml @@ -5,11 +5,11 @@ cuDoubleComplex cuComplex + cudaMemcpy cudaStreamDestroy cudaFree - cudaStreamCreate cudaMalloc - cudaMemcpy + cudaStreamCreate whole @@ -59,6 +59,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md index a992f9ef..3cf4112f 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md @@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -26,14 +26,14 @@ x86_64, ppc64le, armv7l cuDoubleComplex, cuComplex ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaStreamDestroy, cudaFree, cudaMalloc, cudaStreamCreate ## Dependencies needed to build/run [CUSOLVER](../../../README.md#cusolver), [CUSPARSE](../../../README.md#cusparse) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2017.vcxproj index d04553b8..d2c8031e 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cuSolverSp_LowlevelCholesky.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2019.vcxproj index 3b686a78..2703da3a 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cuSolverSp_LowlevelCholesky.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2022.vcxproj index 94e8d403..b85749bf 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cuSolverSp_LowlevelCholesky.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/Makefile b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/Makefile index 5e3384bc..2e7d1c9f 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/Makefile +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/Makefile @@ -283,9 +283,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/NsightEclipse.xml index 3e3dc5c4..51bab24a 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/NsightEclipse.xml @@ -3,14 +3,14 @@ cuSolverSp_LowlevelQR + cuGet cuDoubleComplex cuComplex - cuGet + cudaMemcpy cudaStreamDestroy cudaFree - cudaStreamCreate cudaMalloc - cudaMemcpy + cudaStreamCreate whole @@ -60,6 +60,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md index eae84e4d..df5f2a84 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md @@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,17 +23,17 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuDoubleComplex, cuComplex, cuGet +cuGet, cuDoubleComplex, cuComplex ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaStreamDestroy, cudaFree, cudaMalloc, cudaStreamCreate ## Dependencies needed to build/run [CUSOLVER](../../../README.md#cusolver), [CUSPARSE](../../../README.md#cusparse) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2017.vcxproj index b62c633e..b25aa37d 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/cuSolverSp_LowlevelQR.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2019.vcxproj index 5ab45d7d..af967c45 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cuSolverSp_LowlevelQR.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2022.vcxproj index d8ee669f..bb98790f 100644 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/cuSolverSp_LowlevelQR.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/Makefile b/Samples/4_CUDA_Libraries/cudaNvSci/Makefile index dd8e7bce..f6561b1b 100644 --- a/Samples/4_CUDA_Libraries/cudaNvSci/Makefile +++ b/Samples/4_CUDA_Libraries/cudaNvSci/Makefile @@ -327,9 +327,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cudaNvSci/NsightEclipse.xml index a8659d17..0995196e 100644 --- a/Samples/4_CUDA_Libraries/cudaNvSci/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cudaNvSci/NsightEclipse.xml @@ -7,39 +7,39 @@ cuDeviceGetUuid - cudaGetMipmappedArrayLevel - cudaImportNvSciImage - cudaImportExternalSemaphore - cudaNvSciApp - cudaStreamCreateWithFlags - cudaExternalMemoryGetMappedMipmappedArray - cudaNvSciWait - cudaDestroyExternalMemory - cudaMemcpyAsync - cudaStreamDestroy - cudaSignalExternalSemaphoresAsync - cudaDeviceGetNvSciSyncAttributes - cudaFreeMipmappedArray - cudaMallocHost - cudaNvSci - cudaImportExternalMemory - cudaSetDevice - cudaImportNvSciRawBuf - cudaImportNvSciSemaphore - cudaGetDeviceCount - cudaDestroyTextureObject - cudaDeviceGetAttribute - cudaDestroyExternalSemaphore - cudaStreamSynchronize - cudaNvSciSignal - cudaFree - cudaDeviceId cudaExternalMemoryGetMappedBuffer + cudaImportExternalSemaphore + cudaDeviceGetAttribute + cudaNvSciSignal + cudaGetMipmappedArrayLevel + cudaImportNvSciRawBuf + cudaSetDevice + cudaImportNvSciImage + cudaNvSciApp + cudaDeviceId + cudaMallocHost + cudaSignalExternalSemaphoresAsync cudaCreateTextureObject cudaFreeHost + cudaNvSci + cudaNvSciWait + cudaGetDeviceCount + cudaMemcpyAsync + cudaStreamCreateWithFlags + cudaExternalMemoryGetMappedMipmappedArray + cudaStreamDestroy + cudaDeviceGetNvSciSyncAttributes + cudaDestroyTextureObject + cudaDestroyExternalMemory + cudaImportExternalMemory + cudaDestroyExternalSemaphore + cudaFreeMipmappedArray + cudaFree + cudaStreamSynchronize cudaWaitExternalSemaphoresAsync + cudaImportNvSciSemaphore - + whole ./ @@ -80,6 +80,7 @@ sm80 sm86 sm87 + sm90 aarch64 diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/README.md b/Samples/4_CUDA_Libraries/cudaNvSci/README.md index baac35c6..2e12e227 100644 --- a/Samples/4_CUDA_Libraries/cudaNvSci/README.md +++ b/Samples/4_CUDA_Libraries/cudaNvSci/README.md @@ -2,7 +2,7 @@ ## Description -This sample demonstrates CUDA-NvSciBuf/NvSciSync Interop. Two CPU threads import the NvSciBuf and NvSciSync into CUDA to perform two image processing algorithms on a ppm image - image rotation in 1st thread &amp;amp;amp;amp;amp;amp;amp;amp;amp; rgba to grayscale conversion of rotated image in 2nd thread. Currently only supported on Ubuntu 18.04 +This sample demonstrates CUDA-NvSciBuf/NvSciSync Interop. Two CPU threads import the NvSciBuf and NvSciSync into CUDA to perform two image processing algorithms on a ppm image - image rotation in 1st thread &amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp; rgba to grayscale conversion of rotated image in 2nd thread. Currently only supported on Ubuntu 18.04 ## Key Concepts @@ -10,7 +10,7 @@ CUDA NvSci Interop, Data Parallel Algorithms, Image Processing ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -26,14 +26,14 @@ x86_64, aarch64 cuDeviceGetUuid ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGetMipmappedArrayLevel, cudaImportNvSciImage, cudaImportExternalSemaphore, cudaNvSciApp, cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedMipmappedArray, cudaNvSciWait, cudaDestroyExternalMemory, cudaMemcpyAsync, cudaStreamDestroy, cudaSignalExternalSemaphoresAsync, cudaDeviceGetNvSciSyncAttributes, cudaFreeMipmappedArray, cudaMallocHost, cudaNvSci, cudaImportExternalMemory, cudaSetDevice, cudaImportNvSciRawBuf, cudaImportNvSciSemaphore, cudaGetDeviceCount, cudaDestroyTextureObject, cudaDeviceGetAttribute, cudaDestroyExternalSemaphore, cudaStreamSynchronize, cudaNvSciSignal, cudaFree, cudaDeviceId, cudaExternalMemoryGetMappedBuffer, cudaCreateTextureObject, cudaFreeHost, cudaWaitExternalSemaphoresAsync +cudaExternalMemoryGetMappedBuffer, cudaImportExternalSemaphore, cudaDeviceGetAttribute, cudaNvSciSignal, cudaGetMipmappedArrayLevel, cudaImportNvSciRawBuf, cudaSetDevice, cudaImportNvSciImage, cudaNvSciApp, cudaDeviceId, cudaMallocHost, cudaSignalExternalSemaphoresAsync, cudaCreateTextureObject, cudaFreeHost, cudaNvSci, cudaNvSciWait, cudaGetDeviceCount, cudaMemcpyAsync, cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedMipmappedArray, cudaStreamDestroy, cudaDeviceGetNvSciSyncAttributes, cudaDestroyTextureObject, cudaDestroyExternalMemory, cudaImportExternalMemory, cudaDestroyExternalSemaphore, cudaFreeMipmappedArray, cudaFree, cudaStreamSynchronize, cudaWaitExternalSemaphoresAsync, cudaImportNvSciSemaphore ## Dependencies needed to build/run [NVSCI](../../../README.md#nvsci) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/findnvsci.mk b/Samples/4_CUDA_Libraries/cudaNvSci/findnvsci.mk index 1934e138..0d6d157c 100644 --- a/Samples/4_CUDA_Libraries/cudaNvSci/findnvsci.mk +++ b/Samples/4_CUDA_Libraries/cudaNvSci/findnvsci.mk @@ -56,6 +56,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -101,6 +102,11 @@ ifeq ("$(TARGET_OS)","linux") GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif + ifeq ("$(KYLIN)","0") + GLPATH ?= /usr/lib64/nvidia + GLLINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + endif NVSCIBUFLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscibuf.so -print 2>/dev/null) NVSCISYNCLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscisync.so -print 2>/dev/null) diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile index 390fb6f2..f9a79cde 100644 --- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile +++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile @@ -338,9 +338,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/NsightEclipse.xml index 31ebd06c..0b3b44cb 100644 --- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/NsightEclipse.xml @@ -7,32 +7,32 @@ cuDeviceGetUuid - cudaGetMipmappedArrayLevel - cudaImportNvSciImage cudaImportExternalSemaphore + cudaGetMipmappedArrayLevel + cudaSetDevice + cudaDestroySurfaceObject + cudaCreateSurfaceObject + cudaImportNvSciImage + cudaCreateChannelDesc + cudaMallocHost + cudaSignalExternalSemaphoresAsync + cudaFreeHost + cudaMemcpyAsync cudaStreamCreateWithFlags cudaExternalMemoryGetMappedMipmappedArray - cudaDestroyExternalMemory - cudaMemcpyAsync - cudaStreamDestroy - cudaSignalExternalSemaphoresAsync - cudaDeviceGetNvSciSyncAttributes - cudaFreeMipmappedArray - cudaMallocHost - cudaDestroySurfaceObject - cudaImportExternalMemory - cudaMalloc - cudaSetDevice - cudaDestroyExternalSemaphore - cudaCreateChannelDesc - cudaStreamSynchronize - cudaFree - cudaFreeArray - cudaCreateSurfaceObject - cudaFreeHost cudaMallocArray - cudaWaitExternalSemaphoresAsync + cudaFreeArray + cudaStreamDestroy + cudaDeviceGetNvSciSyncAttributes + cudaDestroyExternalMemory + cudaImportExternalMemory + cudaDestroyExternalSemaphore + cudaFreeMipmappedArray cudaImportNvSciSync + cudaFree + cudaStreamSynchronize + cudaMalloc + cudaWaitExternalSemaphoresAsync whole @@ -77,6 +77,7 @@ sm80 sm86 sm87 + sm90 aarch64 diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/README.md b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/README.md index 2ef3f769..9d1cd136 100644 --- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/README.md +++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/README.md @@ -10,7 +10,7 @@ CUDA NvSci Interop, Data Parallel Algorithms, Image Processing ## Supported SM Architectures -[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -26,14 +26,14 @@ aarch64 cuDeviceGetUuid ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGetMipmappedArrayLevel, cudaImportNvSciImage, cudaImportExternalSemaphore, cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedMipmappedArray, cudaDestroyExternalMemory, cudaMemcpyAsync, cudaStreamDestroy, cudaSignalExternalSemaphoresAsync, cudaDeviceGetNvSciSyncAttributes, cudaFreeMipmappedArray, cudaMallocHost, cudaDestroySurfaceObject, cudaImportExternalMemory, cudaMalloc, cudaSetDevice, cudaDestroyExternalSemaphore, cudaCreateChannelDesc, cudaStreamSynchronize, cudaFree, cudaFreeArray, cudaCreateSurfaceObject, cudaFreeHost, cudaMallocArray, cudaWaitExternalSemaphoresAsync, cudaImportNvSciSync +cudaImportExternalSemaphore, cudaGetMipmappedArrayLevel, cudaSetDevice, cudaDestroySurfaceObject, cudaCreateSurfaceObject, cudaImportNvSciImage, cudaCreateChannelDesc, cudaMallocHost, cudaSignalExternalSemaphoresAsync, cudaFreeHost, cudaMemcpyAsync, cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedMipmappedArray, cudaMallocArray, cudaFreeArray, cudaStreamDestroy, cudaDeviceGetNvSciSyncAttributes, cudaDestroyExternalMemory, cudaImportExternalMemory, cudaDestroyExternalSemaphore, cudaFreeMipmappedArray, cudaImportNvSciSync, cudaFree, cudaStreamSynchronize, cudaMalloc, cudaWaitExternalSemaphoresAsync ## Dependencies needed to build/run [NVSCI](../../../README.md#nvsci), [NvMedia](../../../README.md#nvmedia) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvmedia.mk b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvmedia.mk index 7b8cd1b6..23cfcd53 100644 --- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvmedia.mk +++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvmedia.mk @@ -56,6 +56,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -101,6 +102,11 @@ ifeq ("$(TARGET_OS)","linux") GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif + ifeq ("$(KYLIN)","0") + GLPATH ?= /usr/lib64/nvidia + GLLINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + endif NVMEDIALIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvmedia.so -print 2>/dev/null) diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvsci.mk b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvsci.mk index 1934e138..0d6d157c 100644 --- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvsci.mk +++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvsci.mk @@ -56,6 +56,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -101,6 +102,11 @@ ifeq ("$(TARGET_OS)","linux") GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif + ifeq ("$(KYLIN)","0") + GLPATH ?= /usr/lib64/nvidia + GLLINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + endif NVSCIBUFLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscibuf.so -print 2>/dev/null) NVSCISYNCLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscisync.so -print 2>/dev/null) diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/freeImageInteropNPP/NsightEclipse.xml index 0296542c..33f80755 100644 --- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/NsightEclipse.xml @@ -3,11 +3,11 @@ freeImageInteropNPP - cudaGetDeviceCount - cudaDriverGetVersion - cudaDeviceInit - cudaSetDevice cudaRuntimeGetVersion + cudaSetDevice + cudaGetDeviceCount + cudaDeviceInit + cudaDriverGetVersion cudaGetDeviceProperties @@ -65,6 +65,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/README.md b/Samples/4_CUDA_Libraries/freeImageInteropNPP/README.md index 20361fe8..96e072b6 100644 --- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/README.md +++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/README.md @@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGetDeviceCount, cudaDriverGetVersion, cudaDeviceInit, cudaSetDevice, cudaRuntimeGetVersion, cudaGetDeviceProperties +cudaRuntimeGetVersion, cudaSetDevice, cudaGetDeviceCount, cudaDeviceInit, cudaDriverGetVersion, cudaGetDeviceProperties ## Dependencies needed to build/run [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2017.vcxproj index e791cbe1..b81f5f26 100644 --- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -117,6 +117,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2019.vcxproj index d6b27ff1..03289595 100644 --- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2022.vcxproj index 6b803978..624cfaa8 100644 --- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/histEqualizationNPP/NsightEclipse.xml index eadb5438..9b1554bb 100644 --- a/Samples/4_CUDA_Libraries/histEqualizationNPP/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/NsightEclipse.xml @@ -3,14 +3,14 @@ histEqualizationNPP - cudaFree - cudaGetDeviceCount - cudaDriverGetVersion - cudaMalloc - cudaDeviceInit - cudaSetDevice cudaRuntimeGetVersion cudaMemcpy + cudaFree + cudaSetDevice + cudaGetDeviceCount + cudaDeviceInit + cudaDriverGetVersion + cudaMalloc cudaGetDeviceProperties @@ -71,6 +71,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/README.md b/Samples/4_CUDA_Libraries/histEqualizationNPP/README.md index dff01a26..ecf77bcb 100644 --- a/Samples/4_CUDA_Libraries/histEqualizationNPP/README.md +++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/README.md @@ -10,7 +10,7 @@ Image Processing, Performance Strategies, NPP Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGetDeviceCount, cudaDriverGetVersion, cudaMalloc, cudaDeviceInit, cudaSetDevice, cudaRuntimeGetVersion, cudaMemcpy, cudaGetDeviceProperties +cudaRuntimeGetVersion, cudaMemcpy, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaDeviceInit, cudaDriverGetVersion, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2017.vcxproj index a97516a6..6eb23e16 100644 --- a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -117,6 +117,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2019.vcxproj index 2a220a88..6dd57051 100644 --- a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2022.vcxproj index b1780d1d..3f16252a 100644 --- a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -113,6 +113,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/lineOfSight/Makefile b/Samples/4_CUDA_Libraries/lineOfSight/Makefile index 55adba98..21b842fe 100644 --- a/Samples/4_CUDA_Libraries/lineOfSight/Makefile +++ b/Samples/4_CUDA_Libraries/lineOfSight/Makefile @@ -304,9 +304,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/lineOfSight/NsightEclipse.xml b/Samples/4_CUDA_Libraries/lineOfSight/NsightEclipse.xml index c97e235a..e279a4fc 100644 --- a/Samples/4_CUDA_Libraries/lineOfSight/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/lineOfSight/NsightEclipse.xml @@ -6,11 +6,11 @@ --std=c++14 + cudaCreateChannelDesc + cudaMallocArray cudaFreeArray cudaDeviceSynchronize cudaCreateTextureObject - cudaMallocArray - cudaCreateChannelDesc whole @@ -57,6 +57,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/lineOfSight/README.md b/Samples/4_CUDA_Libraries/lineOfSight/README.md index cbd95d40..e704d3e2 100644 --- a/Samples/4_CUDA_Libraries/lineOfSight/README.md +++ b/Samples/4_CUDA_Libraries/lineOfSight/README.md @@ -10,7 +10,7 @@ Thrust Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFreeArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMallocArray, cudaCreateChannelDesc +cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaDeviceSynchronize, cudaCreateTextureObject ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2017.vcxproj b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2017.vcxproj index 0e4b882b..14b93fad 100644 --- a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/lineOfSight.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2019.vcxproj b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2019.vcxproj index 937d7a1b..e2dc1bf9 100644 --- a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/lineOfSight.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2022.vcxproj b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2022.vcxproj index 41f70879..2b15511d 100644 --- a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/lineOfSight.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/Makefile b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/Makefile index c5e76fb9..dc4bc8dd 100644 --- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/Makefile +++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/NsightEclipse.xml b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/NsightEclipse.xml index 6f7125d9..8c19d35a 100644 --- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/NsightEclipse.xml @@ -3,14 +3,14 @@ matrixMulCUBLAS - cudaFree - cudaEventRecord - cudaEventCreate - cudaEventElapsedTime - cudaEventSynchronize - cudaMalloc cudaMemcpy + cudaFree + cudaEventSynchronize + cudaEventRecord + cudaMalloc + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -57,6 +57,7 @@ sm80 sm86 sm87 + sm90 new/matrixMulCUBLAS.cpp diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/README.md b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/README.md index 1390e9eb..a5d68353 100644 --- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/README.md +++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/README.md @@ -10,7 +10,7 @@ CUDA Runtime API, Performance Strategies, Linear Algebra, CUBLAS ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaEventSynchronize, cudaEventRecord, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [CUBLAS](../../../README.md#cublas) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2017.vcxproj b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2017.vcxproj index ccc83a66..7cf90b09 100644 --- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/matrixMulCUBLAS.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2019.vcxproj b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2019.vcxproj index 6f820ba1..1665d0fa 100644 --- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/matrixMulCUBLAS.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2022.vcxproj b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2022.vcxproj index 4b4d8722..e9257bfb 100644 --- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/matrixMulCUBLAS.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/nvJPEG/NsightEclipse.xml b/Samples/4_CUDA_Libraries/nvJPEG/NsightEclipse.xml index 90114781..baba3f94 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/nvJPEG/NsightEclipse.xml @@ -3,18 +3,18 @@ nvJPEG + cudaHostAlloc + cudaStreamCreateWithFlags cudaStreamDestroy cudaFree - cudaEventRecord - cudaHostAlloc - cudaEventCreate - cudaEventElapsedTime cudaEventSynchronize + cudaEventRecord cudaFreeHost - cudaMalloc - cudaStreamCreateWithFlags cudaStreamSynchronize + cudaMalloc + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -58,6 +58,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/nvJPEG/README.md b/Samples/4_CUDA_Libraries/nvJPEG/README.md index e1d3f7fe..a54a46ca 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG/README.md +++ b/Samples/4_CUDA_Libraries/nvJPEG/README.md @@ -10,7 +10,7 @@ Image Decoding, NVJPEG Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaEventRecord, cudaHostAlloc, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaGetDeviceProperties +cudaHostAlloc, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaFree, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [NVJPEG](../../../README.md#nvjpeg) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2017.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2017.vcxproj index 5abe980b..c5931b66 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2019.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2019.vcxproj index 1bae3470..05906aff 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2022.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2022.vcxproj index 9b68e545..f861ff46 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/NsightEclipse.xml b/Samples/4_CUDA_Libraries/nvJPEG_encoder/NsightEclipse.xml index 157b43cd..fa59430a 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/NsightEclipse.xml @@ -4,14 +4,14 @@ nvJPEG_encoder cudaFree - cudaEventRecord - cudaEventCreate - cudaEventElapsedTime - cudaDeviceSynchronize - cudaEventSynchronize - cudaMalloc cudaGetErrorString + cudaEventSynchronize + cudaDeviceSynchronize + cudaEventRecord + cudaMalloc + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -55,6 +55,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/README.md b/Samples/4_CUDA_Libraries/nvJPEG_encoder/README.md index d3170c4b..09e2227b 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/README.md +++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/README.md @@ -10,7 +10,7 @@ Image Encoding, NVJPEG Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaMalloc, cudaGetErrorString, cudaGetDeviceProperties +cudaFree, cudaGetErrorString, cudaEventSynchronize, cudaDeviceSynchronize, cudaEventRecord, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Dependencies needed to build/run [NVJPEG](../../../README.md#nvjpeg) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj index bd7bdf90..22a09377 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj index bf27a1ed..4e23250e 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2022.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2022.vcxproj index fcef7dc3..735c9892 100644 --- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/oceanFFT/Makefile b/Samples/4_CUDA_Libraries/oceanFFT/Makefile index 333096a4..eaa57172 100644 --- a/Samples/4_CUDA_Libraries/oceanFFT/Makefile +++ b/Samples/4_CUDA_Libraries/oceanFFT/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/oceanFFT/NsightEclipse.xml b/Samples/4_CUDA_Libraries/oceanFFT/NsightEclipse.xml index 3c846efc..8ac28a1b 100644 --- a/Samples/4_CUDA_Libraries/oceanFFT/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/oceanFFT/NsightEclipse.xml @@ -3,17 +3,17 @@ oceanFFT - cudaFree - cudaGraphicsMapResources - cudaUpdateHeightmapKernel - cudaGraphicsGLRegisterBuffer - cudaCalculateSlopeKernel - cudaGraphicsResourceGetMappedPointer - cudaMalloc - cudaGenerateSpectrumKernel - cudaGraphicsUnregisterResource cudaGraphicsUnmapResources cudaMemcpy + cudaMalloc + cudaFree + cudaGraphicsResourceGetMappedPointer + cudaCalculateSlopeKernel + cudaGraphicsMapResources + cudaUpdateHeightmapKernel + cudaGraphicsUnregisterResource + cudaGenerateSpectrumKernel + cudaGraphicsGLRegisterBuffer cudaGetDeviceProperties @@ -85,6 +85,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/oceanFFT/README.md b/Samples/4_CUDA_Libraries/oceanFFT/README.md index e6f1b2a2..00f7aa57 100644 --- a/Samples/4_CUDA_Libraries/oceanFFT/README.md +++ b/Samples/4_CUDA_Libraries/oceanFFT/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing, CUFFT Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaUpdateHeightmapKernel, cudaGraphicsGLRegisterBuffer, cudaCalculateSlopeKernel, cudaGraphicsResourceGetMappedPointer, cudaMalloc, cudaGenerateSpectrumKernel, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaMemcpy, cudaMalloc, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaCalculateSlopeKernel, cudaGraphicsMapResources, cudaUpdateHeightmapKernel, cudaGraphicsUnregisterResource, cudaGenerateSpectrumKernel, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl), [CUFFT](../../../README.md#cufft) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/oceanFFT/findgllib.mk b/Samples/4_CUDA_Libraries/oceanFFT/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/4_CUDA_Libraries/oceanFFT/findgllib.mk +++ b/Samples/4_CUDA_Libraries/oceanFFT/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2017.vcxproj b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2017.vcxproj index ff470ed3..09d8130b 100644 --- a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/oceanFFT.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2019.vcxproj b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2019.vcxproj index 2283b05f..84a21720 100644 --- a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/oceanFFT.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2022.vcxproj b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2022.vcxproj index 5e71f639..9a86a338 100644 --- a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/oceanFFT.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/randomFog/NsightEclipse.xml b/Samples/4_CUDA_Libraries/randomFog/NsightEclipse.xml index 71960c72..467d2ef0 100644 --- a/Samples/4_CUDA_Libraries/randomFog/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/randomFog/NsightEclipse.xml @@ -7,9 +7,9 @@ cudaMalloc - cudaFree - cudaMemcpy cudaGetErrorString + cudaMemcpy + cudaFree whole @@ -76,6 +76,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/randomFog/README.md b/Samples/4_CUDA_Libraries/randomFog/README.md index 9a477994..e101b5b6 100644 --- a/Samples/4_CUDA_Libraries/randomFog/README.md +++ b/Samples/4_CUDA_Libraries/randomFog/README.md @@ -10,7 +10,7 @@ This sample illustrates pseudo- and quasi- random numbers produced by CURAND. ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaMemcpy, cudaGetErrorString +cudaMalloc, cudaGetErrorString, cudaMemcpy, cudaFree ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl), [CURAND](../../../README.md#curand) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/randomFog/findgllib.mk b/Samples/4_CUDA_Libraries/randomFog/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/4_CUDA_Libraries/randomFog/findgllib.mk +++ b/Samples/4_CUDA_Libraries/randomFog/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2017.vcxproj b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2017.vcxproj index 6c354dee..4e8773d6 100644 --- a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -118,6 +118,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2019.vcxproj b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2019.vcxproj index ec4d468c..52ae3ec8 100644 --- a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -114,6 +114,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2022.vcxproj b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2022.vcxproj index 680161ec..126ba72f 100644 --- a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -114,6 +114,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUBLAS/NsightEclipse.xml index 14436606..47e06575 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/NsightEclipse.xml @@ -50,6 +50,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/README.md b/Samples/4_CUDA_Libraries/simpleCUBLAS/README.md index d398137e..dbb814b9 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS/README.md +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/README.md @@ -10,7 +10,7 @@ Image Processing, CUBLAS Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -30,7 +30,7 @@ cudaMalloc, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj index 569946e5..181e913b 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj index 1870c7cc..f0994fdf 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2022.vcxproj index 21e6dcf2..9640014c 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUBLASXT/NsightEclipse.xml index 9aa98823..47bd44a5 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/NsightEclipse.xml @@ -3,9 +3,9 @@ simpleCUBLASXT - cudaFree - cudaGetDeviceCount cudaGetDeviceProperties + cudaGetDeviceCount + cudaFree whole @@ -50,6 +50,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/README.md b/Samples/4_CUDA_Libraries/simpleCUBLASXT/README.md index 37d6d4e9..0d8c9695 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/README.md +++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/README.md @@ -10,7 +10,7 @@ CUBLAS-XT Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGetDeviceCount, cudaGetDeviceProperties +cudaGetDeviceProperties, cudaGetDeviceCount, cudaFree ## Dependencies needed to build/run [CUBLAS](../../../README.md#cublas) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj index eec501a7..29702658 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj index 06446054..32a4ace1 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2022.vcxproj index 114ef777..c900da38 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/Makefile b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/Makefile index 2a75e025..86638c5e 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/Makefile +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/Makefile @@ -291,9 +291,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/NsightEclipse.xml index 2b64842d..3dcea4ef 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/NsightEclipse.xml @@ -4,9 +4,9 @@ simpleCUBLAS_LU cudaGetErrorEnum - cudaFree cudaMalloc cudaMemcpy + cudaFree whole @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/README.md b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/README.md index 94bdb4e8..2b1b93dc 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/README.md +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/README.md @@ -10,7 +10,7 @@ CUBLAS Library, LU decomposition ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGetErrorEnum, cudaFree, cudaMalloc, cudaMemcpy +cudaGetErrorEnum, cudaMalloc, cudaMemcpy, cudaFree ## Dependencies needed to build/run [CUBLAS](../../../README.md#cublas) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj index 9f13e4e8..bffe80d5 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleCUBLAS_LU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj index 9c1278ca..18c1b117 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUBLAS_LU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2022.vcxproj index 4e775803..2ff51879 100644 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUBLAS_LU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/Makefile b/Samples/4_CUDA_Libraries/simpleCUFFT/Makefile index 3c37107d..080c25d2 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT/Makefile +++ b/Samples/4_CUDA_Libraries/simpleCUFFT/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUFFT/NsightEclipse.xml index cd63fbb5..6ba60d80 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/simpleCUFFT/NsightEclipse.xml @@ -4,8 +4,8 @@ simpleCUFFT cudaMalloc - cudaFree cudaMemcpy + cudaFree whole @@ -49,6 +49,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/README.md b/Samples/4_CUDA_Libraries/simpleCUFFT/README.md index 1d8cb404..e91252be 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT/README.md +++ b/Samples/4_CUDA_Libraries/simpleCUFFT/README.md @@ -10,7 +10,7 @@ Image Processing, CUFFT Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaMemcpy +cudaMalloc, cudaMemcpy, cudaFree ## Dependencies needed to build/run [CUFFT](../../../README.md#cufft) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2017.vcxproj index dadcc1f8..a6e80d87 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleCUFFT.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2019.vcxproj index 4920b00d..5eb64892 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUFFT.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2022.vcxproj index 0b3dd99a..2c598379 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUFFT.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/Makefile b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/Makefile index 610b3cc9..c21a0c60 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/Makefile +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/NsightEclipse.xml index 55dddd19..a22e53a0 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/NsightEclipse.xml @@ -6,13 +6,13 @@ _USE_MATH_DEFINES - cudaFree cudaXtFree + cudaMemcpy + cudaFree + cudaSetDevice cudaGetDeviceCount cudaDeviceSynchronize cudaMalloc - cudaSetDevice - cudaMemcpy cudaGetDeviceProperties @@ -57,6 +57,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md index 0c941fbb..9cd1ad57 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md @@ -10,7 +10,7 @@ Image Processing, CUFFT Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaXtFree, cudaGetDeviceCount, cudaDeviceSynchronize, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetDeviceProperties +cudaXtFree, cudaMemcpy, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [CUFFT](../../../README.md#cufft) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2017.vcxproj index 219ca46b..836ea063 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleCUFFT_2d_MGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2019.vcxproj index d8d5f360..86780b70 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUFFT_2d_MGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2022.vcxproj index 44c8dc9a..6e6b95b9 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUFFT_2d_MGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/Makefile b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/Makefile index 91eef96c..94cb18d7 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/Makefile +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/NsightEclipse.xml index 5af2b802..f7274f8a 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/NsightEclipse.xml @@ -3,10 +3,10 @@ simpleCUFFT_MGPU + cudaXtFree + cudaSetDevice cudaGetDeviceCount cudaDeviceSynchronize - cudaSetDevice - cudaXtFree cudaGetDeviceProperties @@ -51,6 +51,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md index 9d894764..bfb6e031 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md @@ -10,7 +10,7 @@ Image Processing, CUFFT Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGetDeviceCount, cudaDeviceSynchronize, cudaSetDevice, cudaXtFree, cudaGetDeviceProperties +cudaXtFree, cudaSetDevice, cudaGetDeviceCount, cudaDeviceSynchronize, cudaGetDeviceProperties ## Dependencies needed to build/run [CUFFT](../../../README.md#cufft) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2017.vcxproj index 8b222ff9..c9da7911 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleCUFFT_MGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2019.vcxproj index 06f27404..694fc4fd 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUFFT_MGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2022.vcxproj index 8a2ca6a0..a2f9ad9c 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleCUFFT_MGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/Makefile b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/Makefile index b9577d0d..c5159bed 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/Makefile +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/Makefile @@ -316,9 +316,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 70 72 75 80 86 87 +SMS ?= 53 70 72 75 80 86 87 90 else -SMS ?= 35 50 60 70 75 80 86 +SMS ?= 35 50 60 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/NsightEclipse.xml index ca2c23e6..eaa551ee 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/NsightEclipse.xml @@ -7,12 +7,12 @@ -std=c++11 - cudaFree - cudaMalloc cudaMemcpy + cudaFree cudaMemcpyFromSymbol - cudaGetDeviceProperties cudaGetDevice + cudaMalloc + cudaGetDeviceProperties separate @@ -60,6 +60,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/README.md b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/README.md index 29938002..71cd8ad1 100644 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/README.md +++ b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/README.md @@ -10,7 +10,7 @@ Image Processing, CUFFT Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaMalloc, cudaMemcpy, cudaMemcpyFromSymbol, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaFree, cudaMemcpyFromSymbol, cudaGetDevice, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [callback](../../../README.md#callback), [CUFFT](../../../README.md#cufft) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/NsightEclipse.xml index 3d351a11..b16c392f 100644 --- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/NsightEclipse.xml +++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/NsightEclipse.xml @@ -3,14 +3,14 @@ watershedSegmentationNPP + cudaRuntimeGetVersion cudaFree cudaDeviceGetAttribute cudaDriverGetVersion + cudaGetDevice cudaStreamGetFlags cudaStreamSynchronize - cudaRuntimeGetVersion cudaGetDeviceProperties - cudaGetDevice whole @@ -62,6 +62,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/README.md b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/README.md index c06333e2..540e7443 100644 --- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/README.md +++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/README.md @@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceGetAttribute, cudaDriverGetVersion, cudaStreamGetFlags, cudaStreamSynchronize, cudaRuntimeGetVersion, cudaGetDeviceProperties, cudaGetDevice +cudaRuntimeGetVersion, cudaFree, cudaDeviceGetAttribute, cudaDriverGetVersion, cudaGetDevice, cudaStreamGetFlags, cudaStreamSynchronize, cudaGetDeviceProperties ## Dependencies needed to build/run [NPP](../../../README.md#npp) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj index 6df4766b..c752f135 100644 --- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj +++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -107,6 +107,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj index 494b75bc..10f8fef8 100644 --- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj +++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2022.vcxproj index e4e32e49..d960f01c 100644 --- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2022.vcxproj +++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -103,6 +103,6 @@ - + diff --git a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2017.vcxproj b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2017.vcxproj index 625c2f57..c63b2d70 100644 --- a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/BlackScholes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2019.vcxproj b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2019.vcxproj index d872e998..adf0db2d 100644 --- a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/BlackScholes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2022.vcxproj b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2022.vcxproj index 69831822..aa30adea 100644 --- a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/BlackScholes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/5_Domain_Specific/BlackScholes/Makefile b/Samples/5_Domain_Specific/BlackScholes/Makefile index b7d3c529..7935e540 100644 --- a/Samples/5_Domain_Specific/BlackScholes/Makefile +++ b/Samples/5_Domain_Specific/BlackScholes/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/BlackScholes/NsightEclipse.xml b/Samples/5_Domain_Specific/BlackScholes/NsightEclipse.xml index ab1f601d..8af9aa6d 100644 --- a/Samples/5_Domain_Specific/BlackScholes/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/BlackScholes/NsightEclipse.xml @@ -7,9 +7,9 @@ cudaMalloc - cudaFree cudaDeviceSynchronize cudaMemcpy + cudaFree whole @@ -50,6 +50,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/BlackScholes/README.md b/Samples/5_Domain_Specific/BlackScholes/README.md index 858b17e6..2d535490 100644 --- a/Samples/5_Domain_Specific/BlackScholes/README.md +++ b/Samples/5_Domain_Specific/BlackScholes/README.md @@ -10,7 +10,7 @@ Computational Finance ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy +cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.vcxproj b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.vcxproj index 01556903..0432f89e 100644 --- a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.vcxproj b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.vcxproj index 309579cc..c97e0a3c 100644 --- a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -104,6 +104,6 @@ - + diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2022.vcxproj b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2022.vcxproj index d65aea1d..3796da00 100644 --- a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -104,6 +104,6 @@ - + diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/README.md b/Samples/5_Domain_Specific/BlackScholes_nvrtc/README.md index b0f95581..a0e4aa67 100644 --- a/Samples/5_Domain_Specific/BlackScholes_nvrtc/README.md +++ b/Samples/5_Domain_Specific/BlackScholes_nvrtc/README.md @@ -10,7 +10,7 @@ Computational Finance, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) cudaBlockSize, cudaGridSize @@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2017.vcxproj b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2017.vcxproj index 1227bc24..09368fc9 100644 --- a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/FDTD3d.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -112,6 +112,6 @@ - + diff --git a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2019.vcxproj b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2019.vcxproj index 10f2dc23..09c34005 100644 --- a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/FDTD3d.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2022.vcxproj b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2022.vcxproj index 8f01460a..6af53b8b 100644 --- a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/FDTD3d.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/FDTD3d/Makefile b/Samples/5_Domain_Specific/FDTD3d/Makefile index 9f41e1d1..bbbee3e3 100644 --- a/Samples/5_Domain_Specific/FDTD3d/Makefile +++ b/Samples/5_Domain_Specific/FDTD3d/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/FDTD3d/NsightEclipse.xml b/Samples/5_Domain_Specific/FDTD3d/NsightEclipse.xml index 68eaef75..7528550b 100644 --- a/Samples/5_Domain_Specific/FDTD3d/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/FDTD3d/NsightEclipse.xml @@ -3,19 +3,19 @@ FDTD3d - cudaMemcpyToSymbol - cudaFree - cudaEventRecord - cudaEventCreate - cudaFuncGetAttributes - cudaGetDeviceCount - cudaEventElapsedTime - cudaDeviceSynchronize - cudaMalloc - cudaEventDestroy - cudaSetDevice cudaMemcpy + cudaMalloc + cudaFree + cudaFuncGetAttributes + cudaSetDevice cudaGetDeviceProperties + cudaDeviceSynchronize + cudaEventRecord + cudaMemcpyToSymbol + cudaEventDestroy + cudaEventElapsedTime + cudaGetDeviceCount + cudaEventCreate whole @@ -60,6 +60,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/FDTD3d/README.md b/Samples/5_Domain_Specific/FDTD3d/README.md index c884443d..80774398 100644 --- a/Samples/5_Domain_Specific/FDTD3d/README.md +++ b/Samples/5_Domain_Specific/FDTD3d/README.md @@ -10,7 +10,7 @@ Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncGetAttributes, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaMalloc, cudaFree, cudaFuncGetAttributes, cudaSetDevice, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaMemcpyToSymbol, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/FlowCPU.flo b/Samples/5_Domain_Specific/HSOpticalFlow/FlowCPU.flo new file mode 100644 index 00000000..d92d4549 Binary files /dev/null and b/Samples/5_Domain_Specific/HSOpticalFlow/FlowCPU.flo differ diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/FlowGPU.flo b/Samples/5_Domain_Specific/HSOpticalFlow/FlowGPU.flo new file mode 100644 index 00000000..45e8df6f Binary files /dev/null and b/Samples/5_Domain_Specific/HSOpticalFlow/FlowGPU.flo differ diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2017.vcxproj b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2017.vcxproj index 26fe77dc..28f83ead 100644 --- a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/HSOpticalFlow.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -117,6 +117,6 @@ - + diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2019.vcxproj b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2019.vcxproj index 563a57be..f9a6290c 100644 --- a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/HSOpticalFlow.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -113,6 +113,6 @@ - + diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2022.vcxproj b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2022.vcxproj index 241a3653..fe372f4e 100644 --- a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/HSOpticalFlow.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -113,6 +113,6 @@ - + diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/Makefile b/Samples/5_Domain_Specific/HSOpticalFlow/Makefile index f6f1a087..98ce7ac6 100644 --- a/Samples/5_Domain_Specific/HSOpticalFlow/Makefile +++ b/Samples/5_Domain_Specific/HSOpticalFlow/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/NsightEclipse.xml b/Samples/5_Domain_Specific/HSOpticalFlow/NsightEclipse.xml index 3de542e3..548a90c3 100644 --- a/Samples/5_Domain_Specific/HSOpticalFlow/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/HSOpticalFlow/NsightEclipse.xml @@ -4,9 +4,9 @@ HSOpticalFlow cudaMalloc + cudaMemcpy cudaMemset cudaFree - cudaMemcpy whole @@ -52,6 +52,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/README.md b/Samples/5_Domain_Specific/HSOpticalFlow/README.md index 363d7f17..195c8954 100644 --- a/Samples/5_Domain_Specific/HSOpticalFlow/README.md +++ b/Samples/5_Domain_Specific/HSOpticalFlow/README.md @@ -10,7 +10,7 @@ Image Processing, Data Parallel Algorithms ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaMemset, cudaFree, cudaMemcpy +cudaMalloc, cudaMemcpy, cudaMemset, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/Mandelbrot/Makefile b/Samples/5_Domain_Specific/Mandelbrot/Makefile index 3daf6eea..777aa73c 100644 --- a/Samples/5_Domain_Specific/Mandelbrot/Makefile +++ b/Samples/5_Domain_Specific/Mandelbrot/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2017.vcxproj b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2017.vcxproj index ef345fe8..4d840508 100644 --- a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/Mandelbrot.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -121,6 +121,6 @@ - + diff --git a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2019.vcxproj b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2019.vcxproj index 64598393..f5774588 100644 --- a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/Mandelbrot.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -117,6 +117,6 @@ - + diff --git a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2022.vcxproj b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2022.vcxproj index 786f8178..ce2b9586 100644 --- a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/Mandelbrot.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -117,6 +117,6 @@ - + diff --git a/Samples/5_Domain_Specific/Mandelbrot/NsightEclipse.xml b/Samples/5_Domain_Specific/Mandelbrot/NsightEclipse.xml index 00d22c18..d74f3c4f 100644 --- a/Samples/5_Domain_Specific/Mandelbrot/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/Mandelbrot/NsightEclipse.xml @@ -3,19 +3,19 @@ Mandelbrot - cudaFree - cudaGraphicsMapResources - cudaGraphicsUnregisterResource - cudaGraphicsGLRegisterBuffer - cudaGLUnregisterBufferObject - cudaGraphicsResourceGetMappedPointer cudaGLUnmapBufferObject - cudaDeviceSynchronize - cudaGLMapBufferObject - cudaMalloc - cudaGLRegisterBufferObject cudaGraphicsUnmapResources cudaMemcpy + cudaFree + cudaGLMapBufferObject + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaGLUnregisterBufferObject + cudaDeviceSynchronize + cudaGLRegisterBufferObject + cudaGraphicsUnregisterResource + cudaMalloc + cudaGraphicsGLRegisterBuffer cudaGetDeviceProperties @@ -79,6 +79,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/Mandelbrot/README.md b/Samples/5_Domain_Specific/Mandelbrot/README.md index 57131463..a09cfeed 100644 --- a/Samples/5_Domain_Specific/Mandelbrot/README.md +++ b/Samples/5_Domain_Specific/Mandelbrot/README.md @@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsGLRegisterBuffer, cudaGLUnregisterBufferObject, cudaGraphicsResourceGetMappedPointer, cudaGLUnmapBufferObject, cudaDeviceSynchronize, cudaGLMapBufferObject, cudaMalloc, cudaGLRegisterBufferObject, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties +cudaGLUnmapBufferObject, cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGLMapBufferObject, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaGLUnregisterBufferObject, cudaDeviceSynchronize, cudaGLRegisterBufferObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/Mandelbrot/findgllib.mk b/Samples/5_Domain_Specific/Mandelbrot/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/5_Domain_Specific/Mandelbrot/findgllib.mk +++ b/Samples/5_Domain_Specific/Mandelbrot/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/Makefile b/Samples/5_Domain_Specific/MonteCarloMultiGPU/Makefile index 503c9678..c38f7e44 100644 --- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/Makefile +++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2017.vcxproj b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2017.vcxproj index f37ca9a9..3330def6 100644 --- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/MonteCarloMultiGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -113,6 +113,6 @@ - + diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2019.vcxproj b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2019.vcxproj index 9c88c3f3..cf2d4ad2 100644 --- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MonteCarloMultiGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2022.vcxproj b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2022.vcxproj index a047f1c0..852394d8 100644 --- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/MonteCarloMultiGPU.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/NsightEclipse.xml b/Samples/5_Domain_Specific/MonteCarloMultiGPU/NsightEclipse.xml index 34d2d8e0..bbe55936 100644 --- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/NsightEclipse.xml @@ -3,23 +3,23 @@ MonteCarloMultiGPU - cudaMemset - cudaFree cudaStreamDestroy - cudaEventRecord - cudaMallocHost - cudaStreamCreate - cudaEventCreate - cudaGetDeviceCount - cudaDeviceSynchronize - cudaEventSynchronize - cudaFreeHost cudaMalloc - cudaEventDestroy + cudaFree + cudaMallocHost cudaSetDevice - cudaMemcpyAsync - cudaStreamSynchronize + cudaEventSynchronize cudaGetDeviceProperties + cudaDeviceSynchronize + cudaEventRecord + cudaFreeHost + cudaMemset + cudaStreamSynchronize + cudaEventDestroy + cudaMemcpyAsync + cudaStreamCreate + cudaGetDeviceCount + cudaEventCreate whole @@ -69,6 +69,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/README.md b/Samples/5_Domain_Specific/MonteCarloMultiGPU/README.md index 7a0f77cb..5eff98b7 100644 --- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/README.md +++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/README.md @@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaStreamDestroy, cudaEventRecord, cudaMallocHost, cudaStreamCreate, cudaEventCreate, cudaGetDeviceCount, cudaDeviceSynchronize, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpyAsync, cudaStreamSynchronize, cudaGetDeviceProperties +cudaStreamDestroy, cudaMalloc, cudaFree, cudaMallocHost, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaMemset, cudaStreamSynchronize, cudaEventDestroy, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceCount, cudaEventCreate ## Dependencies needed to build/run [CURAND](../../../README.md#curand) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/Makefile b/Samples/5_Domain_Specific/NV12toBGRandResize/Makefile index cb4fefd6..f0ea1f7c 100644 --- a/Samples/5_Domain_Specific/NV12toBGRandResize/Makefile +++ b/Samples/5_Domain_Specific/NV12toBGRandResize/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj index fe9e7d3b..845bded6 100644 --- a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/NV12toBGRandResize.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -112,6 +112,6 @@ - + diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj index b4c0cf63..88c92e39 100644 --- a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/NV12toBGRandResize.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2022.vcxproj b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2022.vcxproj index 290d571c..09b89f94 100644 --- a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/NV12toBGRandResize.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/NsightEclipse.xml b/Samples/5_Domain_Specific/NV12toBGRandResize/NsightEclipse.xml index 6cd41314..cec28308 100644 --- a/Samples/5_Domain_Specific/NV12toBGRandResize/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/NV12toBGRandResize/NsightEclipse.xml @@ -3,21 +3,21 @@ NV12toBGRandResize + cudaMemcpy cudaStreamDestroy + cudaMalloc cudaFree - cudaEventRecord cudaMallocManaged - cudaStreamCreate - cudaEventCreate - cudaEventElapsedTime - cudaDeviceSynchronize + cudaStreamAttachMemAsync cudaDestroyTextureObject cudaEventSynchronize - cudaStreamAttachMemAsync + cudaDeviceSynchronize cudaCreateTextureObject - cudaMalloc + cudaEventRecord cudaEventDestroy - cudaMemcpy + cudaEventElapsedTime + cudaStreamCreate + cudaEventCreate whole @@ -58,6 +58,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/README.md b/Samples/5_Domain_Specific/NV12toBGRandResize/README.md index 738dd5b3..f7899913 100644 --- a/Samples/5_Domain_Specific/NV12toBGRandResize/README.md +++ b/Samples/5_Domain_Specific/NV12toBGRandResize/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing, Video Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaEventRecord, cudaMallocManaged, cudaStreamCreate, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaEventSynchronize, cudaStreamAttachMemAsync, cudaCreateTextureObject, cudaMalloc, cudaEventDestroy, cudaMemcpy +cudaMemcpy, cudaStreamDestroy, cudaMalloc, cudaFree, cudaMallocManaged, cudaStreamAttachMemAsync, cudaDestroyTextureObject, cudaEventSynchronize, cudaDeviceSynchronize, cudaCreateTextureObject, cudaEventRecord, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/SLID3D10Texture/README.md b/Samples/5_Domain_Specific/SLID3D10Texture/README.md index 0c62e600..ddd18f04 100644 --- a/Samples/5_Domain_Specific/SLID3D10Texture/README.md +++ b/Samples/5_Domain_Specific/SLID3D10Texture/README.md @@ -10,7 +10,7 @@ Performance Strategies, Graphics Interop, Image Processing, 2D Textures ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -26,14 +26,14 @@ x86_64 cuCtxPushCurrent, cuCtxPopCurrent ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsResourceSetMapFlags, cudaGetDeviceCount, cudaMallocPitch, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGetLastError, cudaGraphicsUnmapResources, cudaGetErrorString, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaGetErrorString, cudaFree, cudaGraphicsResourceSetMapFlags, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGetDeviceProperties ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2017.vcxproj b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2017.vcxproj index 4e8e897e..3b3d82ca 100644 --- a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/SLID3D10Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2019.vcxproj b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2019.vcxproj index a7944ca5..96c3165a 100644 --- a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/SLID3D10Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2022.vcxproj b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2022.vcxproj index b0a7dccf..e26cc9a4 100644 --- a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/SLID3D10Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/SobelFilter/Makefile b/Samples/5_Domain_Specific/SobelFilter/Makefile index bc7f79a2..16893f8c 100644 --- a/Samples/5_Domain_Specific/SobelFilter/Makefile +++ b/Samples/5_Domain_Specific/SobelFilter/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/SobelFilter/NsightEclipse.xml b/Samples/5_Domain_Specific/SobelFilter/NsightEclipse.xml index 48e1cb0d..1409139c 100644 --- a/Samples/5_Domain_Specific/SobelFilter/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/SobelFilter/NsightEclipse.xml @@ -3,20 +3,20 @@ SobelFilter - cudaFree - cudaGraphicsMapResources - cudaFreeArray - cudaGraphicsGLRegisterBuffer - cudaGraphicsResourceGetMappedPointer - cudaDeviceSynchronize - cudaDestroyTextureObject - cudaCreateTextureObject - cudaMalloc - cudaMallocArray - cudaGraphicsUnregisterResource cudaGraphicsUnmapResources cudaMemcpy + cudaMallocArray + cudaFreeArray + cudaFree cudaGetErrorString + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDestroyTextureObject + cudaDeviceSynchronize + cudaCreateTextureObject + cudaGraphicsUnregisterResource + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -89,6 +89,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/SobelFilter/README.md b/Samples/5_Domain_Specific/SobelFilter/README.md index 2c8b2564..f33e8df7 100644 --- a/Samples/5_Domain_Specific/SobelFilter/README.md +++ b/Samples/5_Domain_Specific/SobelFilter/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetErrorString +cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaGetErrorString, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2017.vcxproj b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2017.vcxproj index 4914918d..bb26dfe6 100644 --- a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/SobelFilter.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2019.vcxproj b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2019.vcxproj index 6bef3010..8dcd7a83 100644 --- a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/SobelFilter.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2022.vcxproj b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2022.vcxproj index cffc5757..d9f50ec3 100644 --- a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/SobelFilter.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/SobelFilter/findgllib.mk b/Samples/5_Domain_Specific/SobelFilter/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/5_Domain_Specific/SobelFilter/findgllib.mk +++ b/Samples/5_Domain_Specific/SobelFilter/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/SobolQRNG/Makefile b/Samples/5_Domain_Specific/SobolQRNG/Makefile index 080a5c7c..7d80c57e 100644 --- a/Samples/5_Domain_Specific/SobolQRNG/Makefile +++ b/Samples/5_Domain_Specific/SobolQRNG/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/SobolQRNG/NsightEclipse.xml b/Samples/5_Domain_Specific/SobolQRNG/NsightEclipse.xml index d12d97cc..cddf025f 100644 --- a/Samples/5_Domain_Specific/SobolQRNG/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/SobolQRNG/NsightEclipse.xml @@ -3,13 +3,13 @@ SobolQRNG - cudaFree - cudaDeviceSynchronize - cudaMalloc cudaMemcpy cudaGetErrorString - cudaGetDeviceProperties + cudaFree + cudaDeviceSynchronize cudaGetDevice + cudaMalloc + cudaGetDeviceProperties whole @@ -50,6 +50,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/SobolQRNG/README.md b/Samples/5_Domain_Specific/SobolQRNG/README.md index 13789f59..72bb3f84 100644 --- a/Samples/5_Domain_Specific/SobolQRNG/README.md +++ b/Samples/5_Domain_Specific/SobolQRNG/README.md @@ -10,7 +10,7 @@ Computational Finance ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaGetErrorString, cudaFree, cudaDeviceSynchronize, cudaGetDevice, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2017.vcxproj b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2017.vcxproj index ba88ef6a..2dc8daa7 100644 --- a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/SobolQRNG.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -113,6 +113,6 @@ - + diff --git a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2019.vcxproj b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2019.vcxproj index acb7b91a..1f741088 100644 --- a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/SobolQRNG.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2022.vcxproj b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2022.vcxproj index 26344f69..d54706be 100644 --- a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/SobolQRNG.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/VFlockingD3D10/README.md b/Samples/5_Domain_Specific/VFlockingD3D10/README.md index 0db3f2f8..613b5663 100644 --- a/Samples/5_Domain_Specific/VFlockingD3D10/README.md +++ b/Samples/5_Domain_Specific/VFlockingD3D10/README.md @@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation, Perform ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaGraphicsMapResources, cudaEventRecord, cudaGraphicsUnregisterResource, cudaEventCreate, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaGraphicsResourceSetMapFlags, cudaMalloc, cudaGetLastError, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGetErrorString, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaMemset, cudaGraphicsUnregisterResource, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2017.vcxproj b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2017.vcxproj index 37fe522b..7744ff26 100644 --- a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/VFlockingD3D10.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -110,6 +110,6 @@ - + diff --git a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2019.vcxproj b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2019.vcxproj index de8440f7..3c776cc9 100644 --- a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/VFlockingD3D10.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2022.vcxproj b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2022.vcxproj index 0213cd64..65635ece 100644 --- a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/VFlockingD3D10.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/bicubicTexture/Makefile b/Samples/5_Domain_Specific/bicubicTexture/Makefile index 2e3a5627..cc6c2680 100644 --- a/Samples/5_Domain_Specific/bicubicTexture/Makefile +++ b/Samples/5_Domain_Specific/bicubicTexture/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/bicubicTexture/NsightEclipse.xml b/Samples/5_Domain_Specific/bicubicTexture/NsightEclipse.xml index b9f435b0..bb42ff84 100644 --- a/Samples/5_Domain_Specific/bicubicTexture/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/bicubicTexture/NsightEclipse.xml @@ -3,20 +3,20 @@ bicubicTexture - cudaFree - cudaGraphicsMapResources - cudaFreeArray - cudaGraphicsGLRegisterBuffer - cudaGraphicsResourceGetMappedPointer - cudaDeviceSynchronize - cudaDestroyTextureObject - cudaCreateTextureObject - cudaMalloc - cudaMallocArray - cudaCreateChannelDesc - cudaGraphicsUnregisterResource cudaGraphicsUnmapResources + cudaCreateChannelDesc + cudaMallocArray + cudaFreeArray + cudaFree cudaMemcpy + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDestroyTextureObject + cudaDeviceSynchronize + cudaCreateTextureObject + cudaGraphicsUnregisterResource + cudaMalloc + cudaGraphicsGLRegisterBuffer cudaGetDeviceProperties @@ -86,6 +86,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/bicubicTexture/README.md b/Samples/5_Domain_Specific/bicubicTexture/README.md index db1fc40f..c972ebda 100644 --- a/Samples/5_Domain_Specific/bicubicTexture/README.md +++ b/Samples/5_Domain_Specific/bicubicTexture/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaCreateChannelDesc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaFree, cudaMemcpy, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2017.vcxproj b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2017.vcxproj index f7c07a0e..11e527bb 100644 --- a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/bicubicTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2019.vcxproj b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2019.vcxproj index 9a4ebd55..dbb64abb 100644 --- a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bicubicTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2022.vcxproj b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2022.vcxproj index 9bb627c0..a54bf6b9 100644 --- a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bicubicTexture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/bicubicTexture/data/teapot512.pgm b/Samples/5_Domain_Specific/bicubicTexture/data/teapot512.pgm index 23db9b9a..4e5ca459 100644 Binary files a/Samples/5_Domain_Specific/bicubicTexture/data/teapot512.pgm and b/Samples/5_Domain_Specific/bicubicTexture/data/teapot512.pgm differ diff --git a/Samples/5_Domain_Specific/bicubicTexture/findgllib.mk b/Samples/5_Domain_Specific/bicubicTexture/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/5_Domain_Specific/bicubicTexture/findgllib.mk +++ b/Samples/5_Domain_Specific/bicubicTexture/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/bilateralFilter/Makefile b/Samples/5_Domain_Specific/bilateralFilter/Makefile index 800dfc13..2a4ee06b 100644 --- a/Samples/5_Domain_Specific/bilateralFilter/Makefile +++ b/Samples/5_Domain_Specific/bilateralFilter/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/bilateralFilter/NsightEclipse.xml b/Samples/5_Domain_Specific/bilateralFilter/NsightEclipse.xml index 8ca67bd8..b9e13ffa 100644 --- a/Samples/5_Domain_Specific/bilateralFilter/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/bilateralFilter/NsightEclipse.xml @@ -3,18 +3,18 @@ bilateralFilter - cudaMemcpyToSymbol - cudaFree - cudaGraphicsMapResources - cudaGraphicsGLRegisterBuffer - cudaGraphicsResourceGetMappedPointer - cudaDeviceSynchronize - cudaDestroyTextureObject - cudaMallocPitch - cudaCreateTextureObject - cudaGraphicsUnregisterResource - cudaGraphicsUnmapResources cudaRuntimeGetVersion + cudaGraphicsUnmapResources + cudaMallocPitch + cudaFree + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDestroyTextureObject + cudaDeviceSynchronize + cudaCreateTextureObject + cudaMemcpyToSymbol + cudaGraphicsUnregisterResource + cudaGraphicsGLRegisterBuffer cudaGetDeviceProperties @@ -86,6 +86,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/bilateralFilter/README.md b/Samples/5_Domain_Specific/bilateralFilter/README.md index 8741c589..b31f086f 100644 --- a/Samples/5_Domain_Specific/bilateralFilter/README.md +++ b/Samples/5_Domain_Specific/bilateralFilter/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaRuntimeGetVersion, cudaGetDeviceProperties +cudaRuntimeGetVersion, cudaGraphicsUnmapResources, cudaMallocPitch, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemcpyToSymbol, cudaGraphicsUnregisterResource, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2017.vcxproj b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2017.vcxproj index 5a64d5c9..66d5cb16 100644 --- a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/bilateralFilter.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -120,6 +120,6 @@ - + diff --git a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2019.vcxproj b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2019.vcxproj index 23174936..90816212 100644 --- a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bilateralFilter.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -116,6 +116,6 @@ - + diff --git a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2022.vcxproj b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2022.vcxproj index 3a4c27a8..8f7f94ad 100644 --- a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/bilateralFilter.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -116,6 +116,6 @@ - + diff --git a/Samples/5_Domain_Specific/bilateralFilter/findgllib.mk b/Samples/5_Domain_Specific/bilateralFilter/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/5_Domain_Specific/bilateralFilter/findgllib.mk +++ b/Samples/5_Domain_Specific/bilateralFilter/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/binomialOptions/Makefile b/Samples/5_Domain_Specific/binomialOptions/Makefile index 2c85e8ce..7d498db9 100644 --- a/Samples/5_Domain_Specific/binomialOptions/Makefile +++ b/Samples/5_Domain_Specific/binomialOptions/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/binomialOptions/NsightEclipse.xml b/Samples/5_Domain_Specific/binomialOptions/NsightEclipse.xml index 0730f13d..f5273e49 100644 --- a/Samples/5_Domain_Specific/binomialOptions/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/binomialOptions/NsightEclipse.xml @@ -3,8 +3,8 @@ binomialOptions - cudaMemcpyToSymbol cudaDeviceSynchronize + cudaMemcpyToSymbol cudaMemcpyFromSymbol @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/binomialOptions/README.md b/Samples/5_Domain_Specific/binomialOptions/README.md index 869a40cf..574d7e1f 100644 --- a/Samples/5_Domain_Specific/binomialOptions/README.md +++ b/Samples/5_Domain_Specific/binomialOptions/README.md @@ -10,7 +10,7 @@ Computational Finance ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaDeviceSynchronize, cudaMemcpyFromSymbol +cudaDeviceSynchronize, cudaMemcpyToSymbol, cudaMemcpyFromSymbol ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2017.vcxproj b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2017.vcxproj index a5a832ab..8416e033 100644 --- a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/binomialOptions.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -110,6 +110,6 @@ - + diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2019.vcxproj b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2019.vcxproj index 95af1438..806fed73 100644 --- a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/binomialOptions.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2022.vcxproj b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2022.vcxproj index 7ff63d93..616a1dc6 100644 --- a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/binomialOptions.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/README.md b/Samples/5_Domain_Specific/binomialOptions_nvrtc/README.md index 67b0cf18..0d96c7ef 100644 --- a/Samples/5_Domain_Specific/binomialOptions_nvrtc/README.md +++ b/Samples/5_Domain_Specific/binomialOptions_nvrtc/README.md @@ -10,7 +10,7 @@ Computational Finance, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuLaunchKernel, cuModuleGetGlobal, cuCtxSynchronize, cuMemcpyDtoH, cuModuleGetFunction, cuMemcpyHtoD +cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuModuleGetGlobal, cuCtxSynchronize, cuModuleGetFunction ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) cudaBlockSize, cudaGridSize @@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2017.vcxproj b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2017.vcxproj index b5cd46de..4e020948 100644 --- a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -112,6 +112,6 @@ - + diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2019.vcxproj b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2019.vcxproj index e79add30..153e2b1c 100644 --- a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2022.vcxproj b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2022.vcxproj index 7dbfcf7a..d2720c08 100644 --- a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/Makefile b/Samples/5_Domain_Specific/convolutionFFT2D/Makefile index 35185f28..0528320a 100644 --- a/Samples/5_Domain_Specific/convolutionFFT2D/Makefile +++ b/Samples/5_Domain_Specific/convolutionFFT2D/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/NsightEclipse.xml b/Samples/5_Domain_Specific/convolutionFFT2D/NsightEclipse.xml index f197fa28..f8134055 100644 --- a/Samples/5_Domain_Specific/convolutionFFT2D/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/convolutionFFT2D/NsightEclipse.xml @@ -3,13 +3,13 @@ convolutionFFT2D - cudaMemset - cudaFree - cudaDeviceSynchronize - cudaDestroyTextureObject - cudaCreateTextureObject - cudaMalloc cudaMemcpy + cudaFree + cudaDestroyTextureObject + cudaDeviceSynchronize + cudaCreateTextureObject + cudaMemset + cudaMalloc whole @@ -58,6 +58,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/README.md b/Samples/5_Domain_Specific/convolutionFFT2D/README.md index 089b847d..0f8d5193 100644 --- a/Samples/5_Domain_Specific/convolutionFFT2D/README.md +++ b/Samples/5_Domain_Specific/convolutionFFT2D/README.md @@ -10,7 +10,7 @@ Image Processing, CUFFT Library ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaFree, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemset, cudaMalloc ## Dependencies needed to build/run [CUFFT](../../../README.md#cufft) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2017.vcxproj b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2017.vcxproj index 997c7912..4fe20d85 100644 --- a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/convolutionFFT2D.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -110,6 +110,6 @@ - + diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2019.vcxproj b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2019.vcxproj index 3156c87f..71bad451 100644 --- a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/convolutionFFT2D.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2022.vcxproj b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2022.vcxproj index 641a761c..a73a0ced 100644 --- a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/convolutionFFT2D.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/dwtHaar1D/Makefile b/Samples/5_Domain_Specific/dwtHaar1D/Makefile index d24cac3f..abd407f4 100644 --- a/Samples/5_Domain_Specific/dwtHaar1D/Makefile +++ b/Samples/5_Domain_Specific/dwtHaar1D/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/dwtHaar1D/NsightEclipse.xml b/Samples/5_Domain_Specific/dwtHaar1D/NsightEclipse.xml index 386a5469..daa96121 100644 --- a/Samples/5_Domain_Specific/dwtHaar1D/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/dwtHaar1D/NsightEclipse.xml @@ -4,8 +4,8 @@ dwtHaar1D cudaMalloc - cudaFree cudaMemcpy + cudaFree whole @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/dwtHaar1D/README.md b/Samples/5_Domain_Specific/dwtHaar1D/README.md index 6d4eb998..da368a4e 100644 --- a/Samples/5_Domain_Specific/dwtHaar1D/README.md +++ b/Samples/5_Domain_Specific/dwtHaar1D/README.md @@ -10,7 +10,7 @@ Image Processing, Video Compression ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMalloc, cudaFree, cudaMemcpy +cudaMalloc, cudaMemcpy, cudaFree ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2017.vcxproj b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2017.vcxproj index 6592422c..6b693574 100644 --- a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/dwtHaar1D.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2019.vcxproj b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2019.vcxproj index 9cd792c5..c191c4ff 100644 --- a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/dwtHaar1D.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2022.vcxproj b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2022.vcxproj index 739fd5e6..bba596d8 100644 --- a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/dwtHaar1D.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/5_Domain_Specific/dxtc/Makefile b/Samples/5_Domain_Specific/dxtc/Makefile index 2de94434..b379d6c8 100644 --- a/Samples/5_Domain_Specific/dxtc/Makefile +++ b/Samples/5_Domain_Specific/dxtc/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/dxtc/NsightEclipse.xml b/Samples/5_Domain_Specific/dxtc/NsightEclipse.xml index f194bea9..a31d16cb 100644 --- a/Samples/5_Domain_Specific/dxtc/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/dxtc/NsightEclipse.xml @@ -3,12 +3,12 @@ dxtc + cudaMemcpy cudaFree cudaDeviceSynchronize - cudaMalloc - cudaMemcpy - cudaGetDeviceProperties cudaGetDevice + cudaMalloc + cudaGetDeviceProperties whole @@ -61,6 +61,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/dxtc/README.md b/Samples/5_Domain_Specific/dxtc/README.md index f45d97ea..3a805668 100644 --- a/Samples/5_Domain_Specific/dxtc/README.md +++ b/Samples/5_Domain_Specific/dxtc/README.md @@ -10,7 +10,7 @@ Cooperative Groups, Image Processing, Image Compression ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaGetDevice, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/dxtc/dxtc_vs2017.vcxproj b/Samples/5_Domain_Specific/dxtc/dxtc_vs2017.vcxproj index c655b304..e1d68433 100644 --- a/Samples/5_Domain_Specific/dxtc/dxtc_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/dxtc/dxtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/dxtc.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/dxtc/dxtc_vs2019.vcxproj b/Samples/5_Domain_Specific/dxtc/dxtc_vs2019.vcxproj index 53567caa..c1b7f6db 100644 --- a/Samples/5_Domain_Specific/dxtc/dxtc_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/dxtc/dxtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/dxtc.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/dxtc/dxtc_vs2022.vcxproj b/Samples/5_Domain_Specific/dxtc/dxtc_vs2022.vcxproj index bf3c5c25..8609b640 100644 --- a/Samples/5_Domain_Specific/dxtc/dxtc_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/dxtc/dxtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/dxtc.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/fastWalshTransform/Makefile b/Samples/5_Domain_Specific/fastWalshTransform/Makefile index 1ba7282a..3cf3f547 100644 --- a/Samples/5_Domain_Specific/fastWalshTransform/Makefile +++ b/Samples/5_Domain_Specific/fastWalshTransform/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/fastWalshTransform/NsightEclipse.xml b/Samples/5_Domain_Specific/fastWalshTransform/NsightEclipse.xml index a88475e9..9e627352 100644 --- a/Samples/5_Domain_Specific/fastWalshTransform/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/fastWalshTransform/NsightEclipse.xml @@ -3,11 +3,11 @@ fastWalshTransform - cudaMemset + cudaMemcpy cudaFree cudaDeviceSynchronize + cudaMemset cudaMalloc - cudaMemcpy whole @@ -54,6 +54,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/fastWalshTransform/README.md b/Samples/5_Domain_Specific/fastWalshTransform/README.md index 00dd99d2..473f4ce9 100644 --- a/Samples/5_Domain_Specific/fastWalshTransform/README.md +++ b/Samples/5_Domain_Specific/fastWalshTransform/README.md @@ -10,7 +10,7 @@ Linear Algebra, Data-Parallel Algorithms, Video Compression ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMemset, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2017.vcxproj b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2017.vcxproj index 6b2fb2f2..6ee445df 100644 --- a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/fastWalshTransform.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2019.vcxproj b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2019.vcxproj index d6fdbf34..71cafa08 100644 --- a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/fastWalshTransform.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2022.vcxproj b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2022.vcxproj index 12977578..8e30886b 100644 --- a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/fastWalshTransform.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -104,6 +104,6 @@ - + diff --git a/Samples/5_Domain_Specific/fluidsD3D9/README.md b/Samples/5_Domain_Specific/fluidsD3D9/README.md index 716a5aab..912936d8 100644 --- a/Samples/5_Domain_Specific/fluidsD3D9/README.md +++ b/Samples/5_Domain_Specific/fluidsD3D9/README.md @@ -10,7 +10,7 @@ Graphics Interop, CUFFT Library, Physically-Based Simulation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaFreeArray, cudaGraphicsResourceGetMappedPointer, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGetLastError, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice +cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaGetDevice, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2017.vcxproj b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2017.vcxproj index fe88c506..44221683 100644 --- a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/fluidsD3D9.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -110,6 +110,6 @@ - + diff --git a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2019.vcxproj b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2019.vcxproj index c7cd7d37..3feb3a21 100644 --- a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/fluidsD3D9.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2022.vcxproj b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2022.vcxproj index 2ff8ad41..3dedaf00 100644 --- a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/fluidsD3D9.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/fluidsGL/Makefile b/Samples/5_Domain_Specific/fluidsGL/Makefile index 4596e5ed..1c987228 100644 --- a/Samples/5_Domain_Specific/fluidsGL/Makefile +++ b/Samples/5_Domain_Specific/fluidsGL/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/fluidsGL/NsightEclipse.xml b/Samples/5_Domain_Specific/fluidsGL/NsightEclipse.xml index e2ebcaaa..96bb4ea0 100644 --- a/Samples/5_Domain_Specific/fluidsGL/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/fluidsGL/NsightEclipse.xml @@ -3,19 +3,19 @@ fluidsGL - cudaFree - cudaGraphicsMapResources - cudaFreeArray - cudaGraphicsGLRegisterBuffer - cudaGraphicsResourceGetMappedPointer - cudaDestroyTextureObject - cudaMallocPitch - cudaCreateTextureObject - cudaMalloc - cudaMallocArray - cudaGraphicsUnregisterResource cudaGraphicsUnmapResources cudaMemcpy + cudaMallocArray + cudaFreeArray + cudaFree + cudaMallocPitch + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDestroyTextureObject + cudaCreateTextureObject + cudaGraphicsUnregisterResource + cudaMalloc + cudaGraphicsGLRegisterBuffer cudaGetDeviceProperties @@ -83,6 +83,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/fluidsGL/README.md b/Samples/5_Domain_Specific/fluidsGL/README.md index ea572321..0d492ebc 100644 --- a/Samples/5_Domain_Specific/fluidsGL/README.md +++ b/Samples/5_Domain_Specific/fluidsGL/README.md @@ -10,7 +10,7 @@ Graphics Interop, CUFFT Library, Physically-Based Simulation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl), [CUFFT](../../../README.md#cufft) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/fluidsGL/findgllib.mk b/Samples/5_Domain_Specific/fluidsGL/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/5_Domain_Specific/fluidsGL/findgllib.mk +++ b/Samples/5_Domain_Specific/fluidsGL/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2017.vcxproj b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2017.vcxproj index c90c550e..8d2822ef 100644 --- a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/fluidsGL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -120,6 +120,6 @@ - + diff --git a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2019.vcxproj b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2019.vcxproj index 1e2ee21e..aa2839e7 100644 --- a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/fluidsGL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -116,6 +116,6 @@ - + diff --git a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2022.vcxproj b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2022.vcxproj index 8e7553c7..eeae6a67 100644 --- a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/fluidsGL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -116,6 +116,6 @@ - + diff --git a/Samples/5_Domain_Specific/fluidsGLES/Makefile b/Samples/5_Domain_Specific/fluidsGLES/Makefile index 269debb0..00b514cf 100644 --- a/Samples/5_Domain_Specific/fluidsGLES/Makefile +++ b/Samples/5_Domain_Specific/fluidsGLES/Makefile @@ -313,9 +313,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/fluidsGLES/NsightEclipse.xml b/Samples/5_Domain_Specific/fluidsGLES/NsightEclipse.xml index e5feea24..afaeff7e 100644 --- a/Samples/5_Domain_Specific/fluidsGLES/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/fluidsGLES/NsightEclipse.xml @@ -3,19 +3,19 @@ fluidsGLES - cudaFree - cudaGraphicsMapResources - cudaFreeArray - cudaGraphicsGLRegisterBuffer - cudaGraphicsResourceGetMappedPointer - cudaDestroyTextureObject - cudaMallocPitch - cudaCreateTextureObject - cudaMalloc - cudaMallocArray - cudaGraphicsUnregisterResource cudaGraphicsUnmapResources cudaMemcpy + cudaMallocArray + cudaFreeArray + cudaFree + cudaMallocPitch + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDestroyTextureObject + cudaCreateTextureObject + cudaGraphicsUnregisterResource + cudaMalloc + cudaGraphicsGLRegisterBuffer cudaGetDeviceProperties @@ -74,6 +74,7 @@ sm80 sm86 sm87 + sm90 arm diff --git a/Samples/5_Domain_Specific/fluidsGLES/README.md b/Samples/5_Domain_Specific/fluidsGLES/README.md index 406eb1bc..b2432dd0 100644 --- a/Samples/5_Domain_Specific/fluidsGLES/README.md +++ b/Samples/5_Domain_Specific/fluidsGLES/README.md @@ -10,7 +10,7 @@ Graphics Interop, CUFFT Library, Physically-Based Simulation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties ## Dependencies needed to build/run [X11](../../../README.md#x11), [GLES](../../../README.md#gles), [CUFFT](../../../README.md#cufft) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/fluidsGLES/findgleslib.mk b/Samples/5_Domain_Specific/fluidsGLES/findgleslib.mk index bcb335c1..6da2f078 100644 --- a/Samples/5_Domain_Specific/fluidsGLES/findgleslib.mk +++ b/Samples/5_Domain_Specific/fluidsGLES/findgleslib.mk @@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif - ifeq ("$(SUSE)","0") + + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU, libXi, EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null) GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/marchingCubes/Makefile b/Samples/5_Domain_Specific/marchingCubes/Makefile index 91180207..baf4f0f0 100644 --- a/Samples/5_Domain_Specific/marchingCubes/Makefile +++ b/Samples/5_Domain_Specific/marchingCubes/Makefile @@ -324,9 +324,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/marchingCubes/NsightEclipse.xml b/Samples/5_Domain_Specific/marchingCubes/NsightEclipse.xml index ea03311e..0c23d020 100644 --- a/Samples/5_Domain_Specific/marchingCubes/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/marchingCubes/NsightEclipse.xml @@ -6,21 +6,21 @@ --std=c++14 - cudaFree - cudaGraphicsMapResources - cudaGraphicsUnregisterResource - cudaGraphicsGLRegisterBuffer - cudaGLUnregisterBufferObject - cudaGraphicsResourceGetMappedPointer cudaGLUnmapBufferObject - cudaDestroyTextureObject - cudaGLMapBufferObject - cudaCreateTextureObject - cudaMalloc - cudaCreateChannelDesc - cudaGLRegisterBufferObject cudaGraphicsUnmapResources + cudaCreateChannelDesc cudaMemcpy + cudaFree + cudaGLMapBufferObject + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDestroyTextureObject + cudaGLUnregisterBufferObject + cudaCreateTextureObject + cudaGLRegisterBufferObject + cudaGraphicsUnregisterResource + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -92,6 +92,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/marchingCubes/README.md b/Samples/5_Domain_Specific/marchingCubes/README.md index 683f4f6b..0c2ed9e1 100644 --- a/Samples/5_Domain_Specific/marchingCubes/README.md +++ b/Samples/5_Domain_Specific/marchingCubes/README.md @@ -10,7 +10,7 @@ OpenGL Graphics Interop, Vertex Buffers, 3D Graphics, Physically Based Simulatio ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsGLRegisterBuffer, cudaGLUnregisterBufferObject, cudaGraphicsResourceGetMappedPointer, cudaGLUnmapBufferObject, cudaDestroyTextureObject, cudaGLMapBufferObject, cudaCreateTextureObject, cudaMalloc, cudaCreateChannelDesc, cudaGLRegisterBufferObject, cudaGraphicsUnmapResources, cudaMemcpy +cudaGLUnmapBufferObject, cudaGraphicsUnmapResources, cudaCreateChannelDesc, cudaMemcpy, cudaFree, cudaGLMapBufferObject, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaGLUnregisterBufferObject, cudaCreateTextureObject, cudaGLRegisterBufferObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/marchingCubes/findgllib.mk b/Samples/5_Domain_Specific/marchingCubes/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/5_Domain_Specific/marchingCubes/findgllib.mk +++ b/Samples/5_Domain_Specific/marchingCubes/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2017.vcxproj b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2017.vcxproj index 9eb7601b..9d5e9d28 100644 --- a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/marchingCubes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -119,6 +119,6 @@ - + diff --git a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2019.vcxproj b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2019.vcxproj index e2edca52..9e370d0b 100644 --- a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/marchingCubes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -115,6 +115,6 @@ - + diff --git a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2022.vcxproj b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2022.vcxproj index 284e00cb..ef1da880 100644 --- a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/marchingCubes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -115,6 +115,6 @@ - + diff --git a/Samples/5_Domain_Specific/nbody/Makefile b/Samples/5_Domain_Specific/nbody/Makefile index d1d2e614..f4e1df42 100644 --- a/Samples/5_Domain_Specific/nbody/Makefile +++ b/Samples/5_Domain_Specific/nbody/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/nbody/NsightEclipse.xml b/Samples/5_Domain_Specific/nbody/NsightEclipse.xml index 293adc58..213de221 100644 --- a/Samples/5_Domain_Specific/nbody/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/nbody/NsightEclipse.xml @@ -6,24 +6,24 @@ -ftz=true - cudaMemcpyToSymbol - cudaGraphicsMapResources - cudaEventRecord - cudaStreamQuery - cudaEventCreate - cudaGraphicsResourceGetMappedPointer - cudaGetDeviceCount - cudaEventElapsedTime - cudaDeviceSynchronize - cudaEventSynchronize - cudaGraphicsResourceSetMapFlags - cudaSetDeviceFlags - cudaEventDestroy - cudaDeviceCanAccessPeer - cudaSetDevice cudaGraphicsUnmapResources + cudaSetDeviceFlags + cudaGraphicsResourceSetMapFlags + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaSetDevice + cudaEventSynchronize + cudaGetDeviceCount cudaGetDeviceProperties + cudaDeviceSynchronize + cudaEventRecord cudaGetDevice + cudaMemcpyToSymbol + cudaStreamQuery + cudaEventDestroy + cudaEventElapsedTime + cudaDeviceCanAccessPeer + cudaEventCreate " to the command line will allow users to set # of bodies for simulation. Adding “-numdevices=” to the command line option will cause the sample to use N devices (if available) for simulation. In this mode, the position and velocity data for all bodies are read from system memory using “zero copy” rather than from device memory. For a small number of devices (4 or fewer) and a large enough number of bodies, bandwidth is not a bottleneck so we can achieve strong scaling across these devices.]]> whole @@ -91,6 +91,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/nbody/README.md b/Samples/5_Domain_Specific/nbody/README.md index 40d122a7..837296c7 100644 --- a/Samples/5_Domain_Specific/nbody/README.md +++ b/Samples/5_Domain_Specific/nbody/README.md @@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaGraphicsMapResources, cudaEventRecord, cudaStreamQuery, cudaEventCreate, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaGraphicsResourceSetMapFlags, cudaSetDeviceFlags, cudaEventDestroy, cudaDeviceCanAccessPeer, cudaSetDevice, cudaGraphicsUnmapResources, cudaGetDeviceProperties, cudaGetDevice +cudaGraphicsUnmapResources, cudaSetDeviceFlags, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceCount, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMemcpyToSymbol, cudaStreamQuery, cudaEventDestroy, cudaEventElapsedTime, cudaDeviceCanAccessPeer, cudaEventCreate ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/nbody/findgllib.mk b/Samples/5_Domain_Specific/nbody/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/5_Domain_Specific/nbody/findgllib.mk +++ b/Samples/5_Domain_Specific/nbody/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/nbody/nbody_vs2017.vcxproj b/Samples/5_Domain_Specific/nbody/nbody_vs2017.vcxproj index 1406b31b..99e5a6bf 100644 --- a/Samples/5_Domain_Specific/nbody/nbody_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/nbody/nbody_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/nbody.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -125,6 +125,6 @@ - + diff --git a/Samples/5_Domain_Specific/nbody/nbody_vs2019.vcxproj b/Samples/5_Domain_Specific/nbody/nbody_vs2019.vcxproj index ae04b090..7662e500 100644 --- a/Samples/5_Domain_Specific/nbody/nbody_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/nbody/nbody_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/nbody.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -121,6 +121,6 @@ - + diff --git a/Samples/5_Domain_Specific/nbody/nbody_vs2022.vcxproj b/Samples/5_Domain_Specific/nbody/nbody_vs2022.vcxproj index c1495525..1c2c9ecf 100644 --- a/Samples/5_Domain_Specific/nbody/nbody_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/nbody/nbody_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/nbody.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -121,6 +121,6 @@ - + diff --git a/Samples/5_Domain_Specific/nbody_opengles/Makefile b/Samples/5_Domain_Specific/nbody_opengles/Makefile index 5b56f105..ef0b753b 100644 --- a/Samples/5_Domain_Specific/nbody_opengles/Makefile +++ b/Samples/5_Domain_Specific/nbody_opengles/Makefile @@ -313,9 +313,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/nbody_opengles/NsightEclipse.xml b/Samples/5_Domain_Specific/nbody_opengles/NsightEclipse.xml index e7e56c9a..4ffba110 100644 --- a/Samples/5_Domain_Specific/nbody_opengles/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/nbody_opengles/NsightEclipse.xml @@ -6,23 +6,23 @@ -ftz=true - cudaMemcpyToSymbol - cudaGraphicsMapResources - cudaEventRecord - cudaStreamQuery - cudaEventCreate - cudaGraphicsResourceGetMappedPointer - cudaGetDeviceCount - cudaEventElapsedTime - cudaDeviceSynchronize - cudaEventSynchronize - cudaGraphicsResourceSetMapFlags - cudaSetDeviceFlags - cudaEventDestroy - cudaSetDevice cudaGraphicsUnmapResources + cudaSetDeviceFlags + cudaGraphicsResourceSetMapFlags + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaSetDevice + cudaEventSynchronize cudaGetDeviceProperties + cudaDeviceSynchronize + cudaEventRecord cudaGetDevice + cudaMemcpyToSymbol + cudaStreamQuery + cudaEventDestroy + cudaEventElapsedTime + cudaGetDeviceCount + cudaEventCreate whole @@ -81,6 +81,7 @@ sm80 sm86 sm87 + sm90 arm diff --git a/Samples/5_Domain_Specific/nbody_opengles/README.md b/Samples/5_Domain_Specific/nbody_opengles/README.md index 3c4772d2..a7911e09 100644 --- a/Samples/5_Domain_Specific/nbody_opengles/README.md +++ b/Samples/5_Domain_Specific/nbody_opengles/README.md @@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaGraphicsMapResources, cudaEventRecord, cudaStreamQuery, cudaEventCreate, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaGraphicsResourceSetMapFlags, cudaSetDeviceFlags, cudaEventDestroy, cudaSetDevice, cudaGraphicsUnmapResources, cudaGetDeviceProperties, cudaGetDevice +cudaGraphicsUnmapResources, cudaSetDeviceFlags, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMemcpyToSymbol, cudaStreamQuery, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate ## Dependencies needed to build/run [X11](../../../README.md#x11), [GLES](../../../README.md#gles) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/nbody_opengles/findgleslib.mk b/Samples/5_Domain_Specific/nbody_opengles/findgleslib.mk index bcb335c1..6da2f078 100644 --- a/Samples/5_Domain_Specific/nbody_opengles/findgleslib.mk +++ b/Samples/5_Domain_Specific/nbody_opengles/findgleslib.mk @@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif - ifeq ("$(SUSE)","0") + + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU, libXi, EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null) GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/nbody_screen/Makefile b/Samples/5_Domain_Specific/nbody_screen/Makefile index 60558f49..b54e1795 100644 --- a/Samples/5_Domain_Specific/nbody_screen/Makefile +++ b/Samples/5_Domain_Specific/nbody_screen/Makefile @@ -320,9 +320,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/nbody_screen/NsightEclipse.xml b/Samples/5_Domain_Specific/nbody_screen/NsightEclipse.xml index 1c3275ce..074a2e5b 100644 --- a/Samples/5_Domain_Specific/nbody_screen/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/nbody_screen/NsightEclipse.xml @@ -6,23 +6,23 @@ -ftz=true - cudaMemcpyToSymbol - cudaGraphicsMapResources - cudaEventRecord - cudaStreamQuery - cudaEventCreate - cudaGraphicsResourceGetMappedPointer - cudaGetDeviceCount - cudaEventElapsedTime - cudaDeviceSynchronize - cudaEventSynchronize - cudaGraphicsResourceSetMapFlags - cudaSetDeviceFlags - cudaEventDestroy - cudaSetDevice cudaGraphicsUnmapResources + cudaSetDeviceFlags + cudaGraphicsResourceSetMapFlags + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaSetDevice + cudaEventSynchronize cudaGetDeviceProperties + cudaDeviceSynchronize + cudaEventRecord cudaGetDevice + cudaMemcpyToSymbol + cudaStreamQuery + cudaEventDestroy + cudaEventElapsedTime + cudaGetDeviceCount + cudaEventCreate whole @@ -79,6 +79,7 @@ sm80 sm86 sm87 + sm90 qnx diff --git a/Samples/5_Domain_Specific/nbody_screen/README.md b/Samples/5_Domain_Specific/nbody_screen/README.md index 73787bdd..54b9df1c 100644 --- a/Samples/5_Domain_Specific/nbody_screen/README.md +++ b/Samples/5_Domain_Specific/nbody_screen/README.md @@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaGraphicsMapResources, cudaEventRecord, cudaStreamQuery, cudaEventCreate, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaGraphicsResourceSetMapFlags, cudaSetDeviceFlags, cudaEventDestroy, cudaSetDevice, cudaGraphicsUnmapResources, cudaGetDeviceProperties, cudaGetDevice +cudaGraphicsUnmapResources, cudaSetDeviceFlags, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMemcpyToSymbol, cudaStreamQuery, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate ## Dependencies needed to build/run [screen](../../../README.md#screen), [GLES](../../../README.md#gles) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/nbody_screen/findgleslib.mk b/Samples/5_Domain_Specific/nbody_screen/findgleslib.mk index bcb335c1..6da2f078 100644 --- a/Samples/5_Domain_Specific/nbody_screen/findgleslib.mk +++ b/Samples/5_Domain_Specific/nbody_screen/findgleslib.mk @@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif - ifeq ("$(SUSE)","0") + + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU, libXi, EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null) GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/Makefile b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/Makefile index 6933444e..37afba15 100644 --- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/Makefile +++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/NsightEclipse.xml b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/NsightEclipse.xml index 1fc38a6c..57679e4c 100644 --- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/NsightEclipse.xml @@ -3,30 +3,30 @@ p2pBandwidthLatencyTest - cudaDeviceEnablePeerAccess - cudaOccupancyMaxPotentialBlockSize - cudaStreamCreateWithFlags - cudaDeviceCanAccessPeer - cudaStreamDestroy - cudaHostAlloc - cudaEventCreate - cudaMalloc - cudaEventDestroy cudaSetDevice - cudaMemcpyPeerAsync - cudaGetDeviceProperties + cudaEventDestroy + cudaOccupancyMaxPotentialBlockSize cudaCheckError - cudaGetDeviceCount - cudaEventElapsedTime - cudaGetLastError - cudaDeviceDisablePeerAccess - cudaStreamSynchronize - cudaGetErrorString - cudaStreamWaitEvent - cudaMemset - cudaFree - cudaEventRecord cudaFreeHost + cudaGetDeviceCount + cudaDeviceCanAccessPeer + cudaStreamCreateWithFlags + cudaStreamDestroy + cudaGetLastError + cudaMemset + cudaStreamWaitEvent + cudaEventElapsedTime + cudaEventCreate + cudaHostAlloc + cudaFree + cudaGetErrorString + cudaMemcpyPeerAsync + cudaDeviceDisablePeerAccess + cudaEventRecord + cudaStreamSynchronize + cudaDeviceEnablePeerAccess + cudaMalloc + cudaGetDeviceProperties whole @@ -71,6 +71,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/README.md b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/README.md index 72e34fb1..1df07a63 100644 --- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/README.md +++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/README.md @@ -10,7 +10,7 @@ Performance Strategies, Asynchronous Data Transfers, Unified Virtual Address Spa ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaDeviceEnablePeerAccess, cudaOccupancyMaxPotentialBlockSize, cudaStreamCreateWithFlags, cudaDeviceCanAccessPeer, cudaStreamDestroy, cudaHostAlloc, cudaEventCreate, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpyPeerAsync, cudaGetDeviceProperties, cudaCheckError, cudaGetDeviceCount, cudaEventElapsedTime, cudaGetLastError, cudaDeviceDisablePeerAccess, cudaStreamSynchronize, cudaGetErrorString, cudaStreamWaitEvent, cudaMemset, cudaFree, cudaEventRecord, cudaFreeHost +cudaSetDevice, cudaEventDestroy, cudaOccupancyMaxPotentialBlockSize, cudaCheckError, cudaFreeHost, cudaGetDeviceCount, cudaDeviceCanAccessPeer, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaGetLastError, cudaMemset, cudaStreamWaitEvent, cudaEventElapsedTime, cudaEventCreate, cudaHostAlloc, cudaFree, cudaGetErrorString, cudaMemcpyPeerAsync, cudaDeviceDisablePeerAccess, cudaEventRecord, cudaStreamSynchronize, cudaDeviceEnablePeerAccess, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj index 545fa82c..cbed6fc1 100644 --- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/p2pBandwidthLatencyTest.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj index f8e09274..43fbfc44 100644 --- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/p2pBandwidthLatencyTest.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2022.vcxproj b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2022.vcxproj index 6e35634d..28486552 100644 --- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/p2pBandwidthLatencyTest.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/5_Domain_Specific/postProcessGL/Makefile b/Samples/5_Domain_Specific/postProcessGL/Makefile index 03790fc8..c6f18f8d 100644 --- a/Samples/5_Domain_Specific/postProcessGL/Makefile +++ b/Samples/5_Domain_Specific/postProcessGL/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/postProcessGL/NsightEclipse.xml b/Samples/5_Domain_Specific/postProcessGL/NsightEclipse.xml index c477aeef..756864f0 100644 --- a/Samples/5_Domain_Specific/postProcessGL/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/postProcessGL/NsightEclipse.xml @@ -3,22 +3,22 @@ postProcessGL - cudaFree - cudaGraphicsMapResources - cudaGraphicsUnregisterResource - cudaMemcpyToArray - cudaGraphicsGLRegisterBuffer cudaHostAlloc - cudaGraphicsResourceGetMappedPointer - cudaProcess - cudaDeviceSynchronize - cudaDestroyTextureObject - cudaCreateTextureObject - cudaGraphicsSubResourceGetMappedArray - cudaGetChannelDesc - cudaMalloc - cudaGraphicsGLRegisterImage cudaGraphicsUnmapResources + cudaMalloc + cudaFree + cudaGetChannelDesc + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDestroyTextureObject + cudaMemcpyToArray + cudaDeviceSynchronize + cudaCreateTextureObject + cudaProcess + cudaGraphicsUnregisterResource + cudaGraphicsSubResourceGetMappedArray + cudaGraphicsGLRegisterBuffer + cudaGraphicsGLRegisterImage whole @@ -85,6 +85,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/postProcessGL/README.md b/Samples/5_Domain_Specific/postProcessGL/README.md index 53aa76e7..821e00ff 100644 --- a/Samples/5_Domain_Specific/postProcessGL/README.md +++ b/Samples/5_Domain_Specific/postProcessGL/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaMemcpyToArray, cudaGraphicsGLRegisterBuffer, cudaHostAlloc, cudaGraphicsResourceGetMappedPointer, cudaProcess, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaGraphicsSubResourceGetMappedArray, cudaGetChannelDesc, cudaMalloc, cudaGraphicsGLRegisterImage, cudaGraphicsUnmapResources +cudaHostAlloc, cudaGraphicsUnmapResources, cudaMalloc, cudaFree, cudaGetChannelDesc, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaMemcpyToArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaProcess, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsGLRegisterImage ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/postProcessGL/findgllib.mk b/Samples/5_Domain_Specific/postProcessGL/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/5_Domain_Specific/postProcessGL/findgllib.mk +++ b/Samples/5_Domain_Specific/postProcessGL/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2017.vcxproj b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2017.vcxproj index 015eb83d..23cf0406 100644 --- a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/postProcessGL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2019.vcxproj b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2019.vcxproj index 7ac06e89..08c28e4d 100644 --- a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/postProcessGL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2022.vcxproj b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2022.vcxproj index b549c092..b70819ba 100644 --- a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/postProcessGL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/Makefile b/Samples/5_Domain_Specific/quasirandomGenerator/Makefile index 3bc715cb..04628cd3 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator/Makefile +++ b/Samples/5_Domain_Specific/quasirandomGenerator/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/NsightEclipse.xml b/Samples/5_Domain_Specific/quasirandomGenerator/NsightEclipse.xml index b66fab73..c35eaa11 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/quasirandomGenerator/NsightEclipse.xml @@ -3,12 +3,12 @@ quasirandomGenerator - cudaMemcpyToSymbol - cudaMemset + cudaMemcpy cudaFree cudaDeviceSynchronize + cudaMemset + cudaMemcpyToSymbol cudaMalloc - cudaMemcpy whole @@ -49,6 +49,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/README.md b/Samples/5_Domain_Specific/quasirandomGenerator/README.md index 8abc9977..54afdbba 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator/README.md +++ b/Samples/5_Domain_Specific/quasirandomGenerator/README.md @@ -10,7 +10,7 @@ Computational Finance ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaMemset, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMemset, cudaMemcpyToSymbol, cudaMalloc ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2017.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2017.vcxproj index c23b1172..be6fcdae 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/quasirandomGenerator.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2019.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2019.vcxproj index 4d1c0c50..a7083425 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/quasirandomGenerator.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2022.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2022.vcxproj index dbec91a6..0861b23a 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/quasirandomGenerator.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md index 3e20ffa0..c91d1a24 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md +++ b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md @@ -10,7 +10,7 @@ Computational Finance, Runtime Compilation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuMemFree, cuMemcpyDtoH, cuMemAlloc +cuMemcpyDtoH, cuMemAlloc, cuMemFree ## Dependencies needed to build/run [NVRTC](../../../README.md#nvrtc) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2017.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2017.vcxproj index b07a5e07..3faf8550 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -110,6 +110,6 @@ - + diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2019.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2019.vcxproj index e11918b8..cb8893b8 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2022.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2022.vcxproj index 0bdb2adb..9dc93ac6 100644 --- a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -106,6 +106,6 @@ - + diff --git a/Samples/5_Domain_Specific/recursiveGaussian/Makefile b/Samples/5_Domain_Specific/recursiveGaussian/Makefile index 011149f3..f228dea7 100644 --- a/Samples/5_Domain_Specific/recursiveGaussian/Makefile +++ b/Samples/5_Domain_Specific/recursiveGaussian/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/recursiveGaussian/NsightEclipse.xml b/Samples/5_Domain_Specific/recursiveGaussian/NsightEclipse.xml index 00c1b1b7..624f69c5 100644 --- a/Samples/5_Domain_Specific/recursiveGaussian/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/recursiveGaussian/NsightEclipse.xml @@ -3,17 +3,17 @@ recursiveGaussian - cudaFree - cudaGraphicsMapResources - cudaGraphicsGLRegisterBuffer - cudaGraphicsResourceGetMappedPointer - cudaDeviceSynchronize - cudaMalloc - cudaGraphicsUnregisterResource cudaGraphicsUnmapResources cudaMemcpy - cudaGetDeviceProperties + cudaFree + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDeviceSynchronize cudaGetDevice + cudaGraphicsUnregisterResource + cudaMalloc + cudaGraphicsGLRegisterBuffer + cudaGetDeviceProperties whole @@ -84,6 +84,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/recursiveGaussian/README.md b/Samples/5_Domain_Specific/recursiveGaussian/README.md index 50d33152..9e1475c4 100644 --- a/Samples/5_Domain_Specific/recursiveGaussian/README.md +++ b/Samples/5_Domain_Specific/recursiveGaussian/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice +cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGetDevice, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/recursiveGaussian/findgllib.mk b/Samples/5_Domain_Specific/recursiveGaussian/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/5_Domain_Specific/recursiveGaussian/findgllib.mk +++ b/Samples/5_Domain_Specific/recursiveGaussian/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2017.vcxproj b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2017.vcxproj index e6c68d03..f706030a 100644 --- a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/recursiveGaussian.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2019.vcxproj b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2019.vcxproj index a0de3445..72663de2 100644 --- a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/recursiveGaussian.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2022.vcxproj b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2022.vcxproj index 44739af7..947a471c 100644 --- a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/recursiveGaussian.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D10/README.md b/Samples/5_Domain_Specific/simpleD3D10/README.md index cf446dc4..a9d7cde7 100644 --- a/Samples/5_Domain_Specific/simpleD3D10/README.md +++ b/Samples/5_Domain_Specific/simpleD3D10/README.md @@ -10,7 +10,7 @@ Graphics Interop, 3D Graphics ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaGetLastError, cudaGraphicsUnmapResources, cudaGetErrorString, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaGetErrorString, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaGraphicsUnregisterResource, cudaGetDeviceProperties ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2017.vcxproj index b5bbaf2d..4dadd193 100644 --- a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleD3D10.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2019.vcxproj index 3b3f6b28..1dca8e14 100644 --- a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D10.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2022.vcxproj index 738f55bc..630c0ea1 100644 --- a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D10.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/README.md b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/README.md index a8b68908..49077dc7 100644 --- a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/README.md +++ b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/README.md @@ -10,7 +10,7 @@ Graphics Interop, Texture ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaBindTextureToArray, cudaUnbindTexture, cudaGetLastError, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaMemcpy, cudaMalloc, cudaUnbindTexture, cudaGetErrorString, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaBindTextureToArray, cudaGetDeviceProperties ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2017.vcxproj index 19a3c9f2..08ad0dfc 100644 --- a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleD3D10RenderTarget.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2019.vcxproj index 335175e0..5478db87 100644 --- a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D10RenderTarget.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2022.vcxproj index 75759d68..532e9d4d 100644 --- a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D10RenderTarget.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D10Texture/README.md b/Samples/5_Domain_Specific/simpleD3D10Texture/README.md index f96f34c7..c8f1a2ed 100644 --- a/Samples/5_Domain_Specific/simpleD3D10Texture/README.md +++ b/Samples/5_Domain_Specific/simpleD3D10Texture/README.md @@ -10,7 +10,7 @@ Graphics Interop, Texture ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGetDeviceCount, cudaMallocPitch, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGetLastError, cudaGraphicsUnmapResources, cudaGetErrorString, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaGetErrorString, cudaFree, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGetDeviceProperties ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2017.vcxproj index 2ad151f8..f5f7322f 100644 --- a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleD3D10Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2019.vcxproj index 793e7da3..d0c81315 100644 --- a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D10Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2022.vcxproj index aa188b08..ebc26819 100644 --- a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D10Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D11/README.md b/Samples/5_Domain_Specific/simpleD3D11/README.md index cfdb0c79..fadf5bf0 100644 --- a/Samples/5_Domain_Specific/simpleD3D11/README.md +++ b/Samples/5_Domain_Specific/simpleD3D11/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaSignalExternalSemaphoresAsync, cudaImportVertexBuffer, cudaAcquireSync, cudaGetDeviceCount, cudaImportExternalSemaphore, cudaReleaseSync, cudaImportExternalMemory, cudaExternalMemoryGetMappedBuffer, cudaImportKeyedMutex, cudaStreamCreateWithFlags, cudaDestroyExternalSemaphore, cudaSetDevice, cudaWaitExternalSemaphoresAsync, cudaDestroyExternalMemory +cudaImportKeyedMutex, cudaExternalMemoryGetMappedBuffer, cudaStreamCreateWithFlags, cudaWaitExternalSemaphoresAsync, cudaImportExternalSemaphore, cudaFree, cudaImportVertexBuffer, cudaReleaseSync, cudaSetDevice, cudaSignalExternalSemaphoresAsync, cudaAcquireSync, cudaDestroyExternalMemory, cudaImportExternalMemory, cudaGetDeviceCount, cudaDestroyExternalSemaphore ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2017.vcxproj index 9f0f0e9d..2a11df06 100644 --- a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleD3D11.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2019.vcxproj index af23ccc6..f3dbb2e0 100644 --- a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D11.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2022.vcxproj index b29aa4bb..7338f410 100644 --- a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D11.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/README.md b/Samples/5_Domain_Specific/simpleD3D11Texture/README.md index da8a3875..9c4cf954 100644 --- a/Samples/5_Domain_Specific/simpleD3D11Texture/README.md +++ b/Samples/5_Domain_Specific/simpleD3D11Texture/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGetDeviceCount, cudaMallocPitch, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGetLastError, cudaGraphicsUnmapResources, cudaGetErrorString, cudaGetDeviceProperties +cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaGetErrorString, cudaFree, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGetDeviceProperties ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2017.vcxproj index 81f6d156..025c68ff 100644 --- a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleD3D11Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -112,6 +112,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2019.vcxproj index e2b4e089..67799b21 100644 --- a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D11Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2022.vcxproj index 9b06a4fd..6345c10c 100644 --- a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D11Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -108,6 +108,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D12/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleD3D12/NsightEclipse.xml index f1843ba8..0e142a5a 100644 --- a/Samples/5_Domain_Specific/simpleD3D12/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/simpleD3D12/NsightEclipse.xml @@ -3,19 +3,19 @@ simpleD3D12 - cudaFree - cudaSignalExternalSemaphoresAsync - cudaStreamCreate - cudaGetDeviceCount - cudaImportExternalSemaphore - cudaGetDeviceProperties - cudaImportExternalMemory - cudaExternalMemoryGetMappedBuffer - cudaDestroyExternalSemaphore - cudaSetDevice cudaWaitExternalSemaphoresAsync + cudaExternalMemoryGetMappedBuffer + cudaImportExternalSemaphore + cudaFree + cudaSetDevice + cudaSignalExternalSemaphoresAsync + cudaGetDeviceProperties cudaStreamSynchronize cudaDestroyExternalMemory + cudaStreamCreate + cudaImportExternalMemory + cudaGetDeviceCount + cudaDestroyExternalSemaphore whole @@ -59,6 +59,7 @@ sm80 sm86 sm87 + sm90 windows10 diff --git a/Samples/5_Domain_Specific/simpleD3D12/README.md b/Samples/5_Domain_Specific/simpleD3D12/README.md index 5dc8a446..2e472bf0 100644 --- a/Samples/5_Domain_Specific/simpleD3D12/README.md +++ b/Samples/5_Domain_Specific/simpleD3D12/README.md @@ -10,7 +10,7 @@ Graphics Interop, CUDA DX12 Interop, Image Processing ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaSignalExternalSemaphoresAsync, cudaStreamCreate, cudaGetDeviceCount, cudaImportExternalSemaphore, cudaGetDeviceProperties, cudaImportExternalMemory, cudaExternalMemoryGetMappedBuffer, cudaDestroyExternalSemaphore, cudaSetDevice, cudaWaitExternalSemaphoresAsync, cudaStreamSynchronize, cudaDestroyExternalMemory +cudaWaitExternalSemaphoresAsync, cudaExternalMemoryGetMappedBuffer, cudaImportExternalSemaphore, cudaFree, cudaSetDevice, cudaSignalExternalSemaphoresAsync, cudaGetDeviceProperties, cudaStreamSynchronize, cudaDestroyExternalMemory, cudaStreamCreate, cudaImportExternalMemory, cudaGetDeviceCount, cudaDestroyExternalSemaphore ## Dependencies needed to build/run [DirectX12](../../../README.md#directx12) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2017.vcxproj index b75c6dae..35882386 100644 --- a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleD3D12.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -119,6 +119,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2019.vcxproj index 44e46df3..32c9763c 100644 --- a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2019.vcxproj @@ -39,7 +39,7 @@ - + @@ -68,7 +68,7 @@ $(OutDir)/simpleD3D12.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -120,6 +120,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2022.vcxproj index 5a793f3c..a7462092 100644 --- a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2022.vcxproj @@ -39,7 +39,7 @@ - + @@ -68,7 +68,7 @@ $(OutDir)/simpleD3D12.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -120,6 +120,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D9/README.md b/Samples/5_Domain_Specific/simpleD3D9/README.md index 4a9ea67b..708a6cf3 100644 --- a/Samples/5_Domain_Specific/simpleD3D9/README.md +++ b/Samples/5_Domain_Specific/simpleD3D9/README.md @@ -10,7 +10,7 @@ Graphics Interop ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsUnmapResources +cudaGraphicsUnmapResources, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaGraphicsUnregisterResource ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2017.vcxproj index bea08338..513a7b28 100644 --- a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleD3D9.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -109,6 +109,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2019.vcxproj index 9089cbf7..6c61823a 100644 --- a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D9.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2022.vcxproj index ffda5e78..3389eb90 100644 --- a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D9.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -105,6 +105,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D9Texture/README.md b/Samples/5_Domain_Specific/simpleD3D9Texture/README.md index 96dffa34..51bf5c0b 100644 --- a/Samples/5_Domain_Specific/simpleD3D9Texture/README.md +++ b/Samples/5_Domain_Specific/simpleD3D9Texture/README.md @@ -10,7 +10,7 @@ Graphics Interop, Texture ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaMallocPitch, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGetLastError, cudaGraphicsUnmapResources +cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaFree, cudaGetLastError, cudaGraphicsMapResources, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray ## Dependencies needed to build/run [DirectX](../../../README.md#directx) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2017.vcxproj index 5e316cdc..ab2a79d1 100644 --- a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleD3D9Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -111,6 +111,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2019.vcxproj index 3280f464..53853db5 100644 --- a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D9Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2022.vcxproj index 0654afd2..de004fdc 100644 --- a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleD3D9Texture.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleGL/Makefile b/Samples/5_Domain_Specific/simpleGL/Makefile index ad666c8a..47d5ed80 100644 --- a/Samples/5_Domain_Specific/simpleGL/Makefile +++ b/Samples/5_Domain_Specific/simpleGL/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/simpleGL/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleGL/NsightEclipse.xml index 228af026..0b73f33b 100644 --- a/Samples/5_Domain_Specific/simpleGL/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/simpleGL/NsightEclipse.xml @@ -3,15 +3,15 @@ simpleGL - cudaFree - cudaGraphicsMapResources - cudaGraphicsGLRegisterBuffer - cudaGraphicsResourceGetMappedPointer - cudaDeviceSynchronize - cudaMalloc - cudaGraphicsUnregisterResource cudaGraphicsUnmapResources cudaMemcpy + cudaFree + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDeviceSynchronize + cudaGraphicsUnregisterResource + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -70,6 +70,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/simpleGL/README.md b/Samples/5_Domain_Specific/simpleGL/README.md index e1fc492b..5fc03527 100644 --- a/Samples/5_Domain_Specific/simpleGL/README.md +++ b/Samples/5_Domain_Specific/simpleGL/README.md @@ -10,7 +10,7 @@ Graphics Interop, Vertex Buffers, 3D Graphics ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy +cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleGL/findgllib.mk b/Samples/5_Domain_Specific/simpleGL/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/5_Domain_Specific/simpleGL/findgllib.mk +++ b/Samples/5_Domain_Specific/simpleGL/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2017.vcxproj index cf0f4bdc..89aab5d9 100644 --- a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleGL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -117,6 +117,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2019.vcxproj index 67bdb231..934fb116 100644 --- a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleGL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -113,6 +113,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2022.vcxproj index f6a41b0b..e571db13 100644 --- a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleGL.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -113,6 +113,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleGLES/Makefile b/Samples/5_Domain_Specific/simpleGLES/Makefile index 51e32773..6e0e516e 100644 --- a/Samples/5_Domain_Specific/simpleGLES/Makefile +++ b/Samples/5_Domain_Specific/simpleGLES/Makefile @@ -313,9 +313,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/simpleGLES/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleGLES/NsightEclipse.xml index 93fd9c15..fc5a25be 100644 --- a/Samples/5_Domain_Specific/simpleGLES/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/simpleGLES/NsightEclipse.xml @@ -8,15 +8,15 @@ -DUSE_GLES - cudaFree - cudaGraphicsMapResources - cudaGraphicsGLRegisterBuffer - cudaGraphicsResourceGetMappedPointer - cudaDeviceSynchronize - cudaMalloc - cudaGraphicsUnregisterResource cudaGraphicsUnmapResources cudaMemcpy + cudaFree + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDeviceSynchronize + cudaGraphicsUnregisterResource + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -66,6 +66,7 @@ sm80 sm86 sm87 + sm90 graphics_interface.c diff --git a/Samples/5_Domain_Specific/simpleGLES/README.md b/Samples/5_Domain_Specific/simpleGLES/README.md index 454218cf..50644d74 100644 --- a/Samples/5_Domain_Specific/simpleGLES/README.md +++ b/Samples/5_Domain_Specific/simpleGLES/README.md @@ -10,7 +10,7 @@ Graphics Interop, Vertex Buffers, 3D Graphics ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy +cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [X11](../../../README.md#x11), [GLES](../../../README.md#gles) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleGLES/findgleslib.mk b/Samples/5_Domain_Specific/simpleGLES/findgleslib.mk index bcb335c1..6da2f078 100644 --- a/Samples/5_Domain_Specific/simpleGLES/findgleslib.mk +++ b/Samples/5_Domain_Specific/simpleGLES/findgleslib.mk @@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif - ifeq ("$(SUSE)","0") + + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU, libXi, EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null) GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/Makefile b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/Makefile index d3e12b7a..7debcfaa 100644 --- a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/Makefile +++ b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/Makefile @@ -313,9 +313,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/NsightEclipse.xml index 27cd4982..a12a32e0 100644 --- a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/NsightEclipse.xml @@ -10,15 +10,15 @@ -I/usr/include/drm - cudaFree - cudaGraphicsMapResources - cudaGraphicsGLRegisterBuffer - cudaGraphicsResourceGetMappedPointer - cudaDeviceSynchronize - cudaMalloc - cudaGraphicsUnregisterResource cudaGraphicsUnmapResources cudaMemcpy + cudaFree + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDeviceSynchronize + cudaGraphicsUnregisterResource + cudaMalloc + cudaGraphicsGLRegisterBuffer sm80 sm86 sm87 + sm90 graphics_interface_egloutput_via_egl.c diff --git a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/README.md b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/README.md index c468f0bb..f11b2411 100644 --- a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/README.md +++ b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/README.md @@ -15,7 +15,7 @@ Graphics Interop, Vertex Buffers, 3D Graphics ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -28,14 +28,14 @@ armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy +cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [EGLOutput](../../../README.md#egloutput), [GLES](../../../README.md#gles) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/findgleslib.mk b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/findgleslib.mk index bcb335c1..6da2f078 100644 --- a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/findgleslib.mk +++ b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/findgleslib.mk @@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif - ifeq ("$(SUSE)","0") + + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU, libXi, EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null) GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/simpleGLES_screen/Makefile b/Samples/5_Domain_Specific/simpleGLES_screen/Makefile index 971ce716..2629cc49 100644 --- a/Samples/5_Domain_Specific/simpleGLES_screen/Makefile +++ b/Samples/5_Domain_Specific/simpleGLES_screen/Makefile @@ -320,9 +320,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/simpleGLES_screen/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleGLES_screen/NsightEclipse.xml index 342e0ce5..0b911915 100644 --- a/Samples/5_Domain_Specific/simpleGLES_screen/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/simpleGLES_screen/NsightEclipse.xml @@ -9,15 +9,15 @@ -DWIN_INTERFACE_CUSTOM - cudaFree - cudaGraphicsMapResources - cudaGraphicsGLRegisterBuffer - cudaGraphicsResourceGetMappedPointer - cudaDeviceSynchronize - cudaMalloc - cudaGraphicsUnregisterResource cudaGraphicsUnmapResources cudaMemcpy + cudaFree + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDeviceSynchronize + cudaGraphicsUnregisterResource + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -67,6 +67,7 @@ sm80 sm86 sm87 + sm90 graphics_interface.c diff --git a/Samples/5_Domain_Specific/simpleGLES_screen/README.md b/Samples/5_Domain_Specific/simpleGLES_screen/README.md index cf4804fd..78f96be4 100644 --- a/Samples/5_Domain_Specific/simpleGLES_screen/README.md +++ b/Samples/5_Domain_Specific/simpleGLES_screen/README.md @@ -10,7 +10,7 @@ Graphics Interop, Vertex Buffers, 3D Graphics ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy +cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [screen](../../../README.md#screen), [GLES](../../../README.md#gles) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleGLES_screen/findgleslib.mk b/Samples/5_Domain_Specific/simpleGLES_screen/findgleslib.mk index bcb335c1..6da2f078 100644 --- a/Samples/5_Domain_Specific/simpleGLES_screen/findgleslib.mk +++ b/Samples/5_Domain_Specific/simpleGLES_screen/findgleslib.mk @@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif - ifeq ("$(SUSE)","0") + + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU, libXi, EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null) GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/simpleVulkan/Makefile b/Samples/5_Domain_Specific/simpleVulkan/Makefile index 83f57ad6..8b5cfd46 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/Makefile +++ b/Samples/5_Domain_Specific/simpleVulkan/Makefile @@ -338,9 +338,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/simpleVulkan/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleVulkan/NsightEclipse.xml index a0ec1c56..1d9449e7 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/simpleVulkan/NsightEclipse.xml @@ -6,24 +6,24 @@ --std=c++11 - cudaTimelineSemaphore - cudaSignalExternalSemaphoresAsync - cudaOccupancyMaxActiveBlocksPerMultiprocessor - cudaGetDeviceCount - cudaImportExternalSemaphore - cudaVertMem - cudaImportExternalMemory - cudaDestroyExternalMemory + cudaStreamCreateWithFlags cudaExternalMemoryGetMappedBuffer cudaSignalSemaphore - cudaStreamCreateWithFlags + cudaWaitExternalSemaphoresAsync + cudaVertMem + cudaImportExternalSemaphore cudaWaitSemaphore cudaHeightMap - cudaDestroyExternalSemaphore cudaSetDevice - cudaWaitExternalSemaphoresAsync + cudaGetDeviceCount + cudaSignalExternalSemaphoresAsync + cudaTimelineSemaphore cudaStreamSynchronize + cudaDestroyExternalMemory + cudaOccupancyMaxActiveBlocksPerMultiprocessor + cudaImportExternalMemory cudaGetDeviceProperties + cudaDestroyExternalSemaphore whole @@ -75,6 +75,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/simpleVulkan/README.md b/Samples/5_Domain_Specific/simpleVulkan/README.md index 27e2dd04..fd287027 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/README.md +++ b/Samples/5_Domain_Specific/simpleVulkan/README.md @@ -10,7 +10,7 @@ Graphics Interop, CUDA Vulkan Interop, Data Parallel Algorithms ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaTimelineSemaphore, cudaSignalExternalSemaphoresAsync, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceCount, cudaImportExternalSemaphore, cudaVertMem, cudaImportExternalMemory, cudaDestroyExternalMemory, cudaExternalMemoryGetMappedBuffer, cudaSignalSemaphore, cudaStreamCreateWithFlags, cudaWaitSemaphore, cudaHeightMap, cudaDestroyExternalSemaphore, cudaSetDevice, cudaWaitExternalSemaphoresAsync, cudaStreamSynchronize, cudaGetDeviceProperties +cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedBuffer, cudaSignalSemaphore, cudaWaitExternalSemaphoresAsync, cudaVertMem, cudaImportExternalSemaphore, cudaWaitSemaphore, cudaHeightMap, cudaSetDevice, cudaGetDeviceCount, cudaSignalExternalSemaphoresAsync, cudaTimelineSemaphore, cudaStreamSynchronize, cudaDestroyExternalMemory, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaImportExternalMemory, cudaGetDeviceProperties, cudaDestroyExternalSemaphore ## Dependencies needed to build/run [X11](../../../README.md#x11), [VULKAN](../../../README.md#vulkan) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleVulkan/findvulkan.mk b/Samples/5_Domain_Specific/simpleVulkan/findvulkan.mk index 4d4e8aed..004ab22b 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/findvulkan.mk +++ b/Samples/5_Domain_Specific/simpleVulkan/findvulkan.mk @@ -57,6 +57,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -103,6 +104,11 @@ ifeq ("$(TARGET_OS)","linux") GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif + ifeq ("$(KYLIN)","0") + GLPATH ?= /usr/lib64/nvidia + GLLINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + endif VULKAN_SDK_PATH ?= ${VULKAN_SDK} diff --git a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2017.vcxproj index 5e7353a0..1ec49d05 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleVulkan.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -121,6 +121,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2019.vcxproj index 55b1a4b5..4a4e5a63 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleVulkan.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -117,6 +117,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2022.vcxproj index 9f655efa..81252a96 100644 --- a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleVulkan.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -117,6 +117,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile b/Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile index 5a162a2b..42e9802f 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile @@ -340,9 +340,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleVulkanMMAP/NsightEclipse.xml index fed119ce..5f91fb68 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/NsightEclipse.xml @@ -6,36 +6,36 @@ --std=c++11 + cuMemCreate + cuMemAddressReserve + cuMemGetAllocationGranularity + cuMemAddressFree + cuMemUnmap + cuMemMap cuMemRelease cuMemExportToShareableHandle cuMemSetAccess - cuMemMap - cuMemCreate - cuMemAddressFree - cuMemGetAllocationGranularity - cuMemUnmap - cuMemAddressReserve - cudaOccupancyMaxActiveBlocksPerMultiprocessor + cudaWaitExternalSemaphoresAsync cudaImportExternalSemaphore - cudaStreamCreateWithFlags - cudaMemcpyAsync - cudaStreamDestroy - cudaSignalExternalSemaphoresAsync - cudaMallocHost - cudaMalloc - cudaSetDevice - cudaGetDeviceProperties - cudaGetDeviceCount cudaDeviceGetAttribute + cudaSetDevice + cudaLaunchHostFunc + cudaMallocHost + cudaSignalExternalSemaphoresAsync + cudaFreeHost + cudaMemsetAsync + cudaMemcpyAsync + cudaGetDeviceCount + cudaStreamCreateWithFlags + cudaStreamDestroy + cudaDestroyExternalSemaphore cudaSignalSemaphore cudaWaitSemaphore - cudaDestroyExternalSemaphore - cudaStreamSynchronize cudaFree - cudaLaunchHostFunc - cudaMemsetAsync - cudaFreeHost - cudaWaitExternalSemaphoresAsync + cudaStreamSynchronize + cudaMalloc + cudaOccupancyMaxActiveBlocksPerMultiprocessor + cudaGetDeviceProperties whole @@ -91,6 +91,7 @@ sm80 sm86 sm87 + sm90 ../../../Common/helper_multiprocess.cpp ../../../Common/helper_multiprocess.h diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/README.md b/Samples/5_Domain_Specific/simpleVulkanMMAP/README.md index eea7f28e..3030b57b 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/README.md +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/README.md @@ -10,7 +10,7 @@ cuMemMap IPC, MMAP, Graphics Interop, CUDA Vulkan Interop, Data Parallel Algorit ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,17 +23,17 @@ x86_64, aarch64 ## CUDA APIs involved ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html) -cuMemRelease, cuMemExportToShareableHandle, cuMemSetAccess, cuMemMap, cuMemCreate, cuMemAddressFree, cuMemGetAllocationGranularity, cuMemUnmap, cuMemAddressReserve +cuMemCreate, cuMemAddressReserve, cuMemGetAllocationGranularity, cuMemAddressFree, cuMemUnmap, cuMemMap, cuMemRelease, cuMemExportToShareableHandle, cuMemSetAccess ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaImportExternalSemaphore, cudaStreamCreateWithFlags, cudaMemcpyAsync, cudaStreamDestroy, cudaSignalExternalSemaphoresAsync, cudaMallocHost, cudaMalloc, cudaSetDevice, cudaGetDeviceProperties, cudaGetDeviceCount, cudaDeviceGetAttribute, cudaSignalSemaphore, cudaWaitSemaphore, cudaDestroyExternalSemaphore, cudaStreamSynchronize, cudaFree, cudaLaunchHostFunc, cudaMemsetAsync, cudaFreeHost, cudaWaitExternalSemaphoresAsync +cudaWaitExternalSemaphoresAsync, cudaImportExternalSemaphore, cudaDeviceGetAttribute, cudaSetDevice, cudaLaunchHostFunc, cudaMallocHost, cudaSignalExternalSemaphoresAsync, cudaFreeHost, cudaMemsetAsync, cudaMemcpyAsync, cudaGetDeviceCount, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaDestroyExternalSemaphore, cudaSignalSemaphore, cudaWaitSemaphore, cudaFree, cudaStreamSynchronize, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties ## Dependencies needed to build/run [X11](../../../README.md#x11), [VULKAN](../../../README.md#vulkan) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/findvulkan.mk b/Samples/5_Domain_Specific/simpleVulkanMMAP/findvulkan.mk index 4d4e8aed..004ab22b 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/findvulkan.mk +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/findvulkan.mk @@ -57,6 +57,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -103,6 +104,11 @@ ifeq ("$(TARGET_OS)","linux") GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif + ifeq ("$(KYLIN)","0") + GLPATH ?= /usr/lib64/nvidia + GLLINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + endif VULKAN_SDK_PATH ?= ${VULKAN_SDK} diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj index ea52d956..2c6ebec4 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/simpleVulkanMMAP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -123,6 +123,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj index b41768d6..1343dd04 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleVulkanMMAP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -119,6 +119,6 @@ - + diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2022.vcxproj index a13ab719..cf29fc5c 100644 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/simpleVulkanMMAP.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -119,6 +119,6 @@ - + diff --git a/Samples/5_Domain_Specific/smokeParticles/Makefile b/Samples/5_Domain_Specific/smokeParticles/Makefile index b6f4f0ff..f6afadbe 100644 --- a/Samples/5_Domain_Specific/smokeParticles/Makefile +++ b/Samples/5_Domain_Specific/smokeParticles/Makefile @@ -324,9 +324,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/smokeParticles/NsightEclipse.xml b/Samples/5_Domain_Specific/smokeParticles/NsightEclipse.xml index ca2f9c07..581dba9a 100644 --- a/Samples/5_Domain_Specific/smokeParticles/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/smokeParticles/NsightEclipse.xml @@ -6,10 +6,10 @@ --std=c++14 - cudaCreateTextureObject cudaExtent - cudaMemcpyToSymbol cudaPitchedPtr + cudaCreateTextureObject + cudaMemcpyToSymbol whole @@ -79,6 +79,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/smokeParticles/README.md b/Samples/5_Domain_Specific/smokeParticles/README.md index e5082eba..3750d773 100644 --- a/Samples/5_Domain_Specific/smokeParticles/README.md +++ b/Samples/5_Domain_Specific/smokeParticles/README.md @@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaCreateTextureObject, cudaExtent, cudaMemcpyToSymbol, cudaPitchedPtr +cudaExtent, cudaPitchedPtr, cudaCreateTextureObject, cudaMemcpyToSymbol ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/smokeParticles/findgllib.mk b/Samples/5_Domain_Specific/smokeParticles/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/5_Domain_Specific/smokeParticles/findgllib.mk +++ b/Samples/5_Domain_Specific/smokeParticles/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2017.vcxproj b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2017.vcxproj index 07d6838a..1807407d 100644 --- a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/smokeParticles.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -137,6 +137,6 @@ - + diff --git a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2019.vcxproj b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2019.vcxproj index e69a7c0b..661e6410 100644 --- a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/smokeParticles.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -133,6 +133,6 @@ - + diff --git a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2022.vcxproj b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2022.vcxproj index 0a64cc05..d6bb21aa 100644 --- a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/smokeParticles.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -133,6 +133,6 @@ - + diff --git a/Samples/5_Domain_Specific/stereoDisparity/Makefile b/Samples/5_Domain_Specific/stereoDisparity/Makefile index 4b2ae8d0..7608b56b 100644 --- a/Samples/5_Domain_Specific/stereoDisparity/Makefile +++ b/Samples/5_Domain_Specific/stereoDisparity/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/stereoDisparity/NsightEclipse.xml b/Samples/5_Domain_Specific/stereoDisparity/NsightEclipse.xml index e4c72e7f..a0249c74 100644 --- a/Samples/5_Domain_Specific/stereoDisparity/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/stereoDisparity/NsightEclipse.xml @@ -6,16 +6,16 @@ *.pgm - cudaFree - cudaEventRecord - cudaEventCreate - cudaEventElapsedTime - cudaDeviceSynchronize - cudaEventSynchronize - cudaCreateTextureObject - cudaMalloc cudaMemcpy + cudaFree + cudaEventSynchronize + cudaDeviceSynchronize + cudaCreateTextureObject + cudaEventRecord + cudaMalloc + cudaEventElapsedTime cudaGetDeviceProperties + cudaEventCreate whole @@ -61,6 +61,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/stereoDisparity/README.md b/Samples/5_Domain_Specific/stereoDisparity/README.md index 1034eb62..81b9eca4 100644 --- a/Samples/5_Domain_Specific/stereoDisparity/README.md +++ b/Samples/5_Domain_Specific/stereoDisparity/README.md @@ -10,7 +10,7 @@ Image Processing, Video Intrinsics ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaCreateTextureObject, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaEventSynchronize, cudaDeviceSynchronize, cudaCreateTextureObject, cudaEventRecord, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2017.vcxproj b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2017.vcxproj index eb5a3d5b..9305e49d 100644 --- a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/stereoDisparity.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2019.vcxproj b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2019.vcxproj index 1147bc9d..bea60c05 100644 --- a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/stereoDisparity.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2022.vcxproj b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2022.vcxproj index 16e56415..0b22b48a 100644 --- a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/stereoDisparity.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/5_Domain_Specific/volumeFiltering/Makefile b/Samples/5_Domain_Specific/volumeFiltering/Makefile index dfcda5bb..438c552e 100644 --- a/Samples/5_Domain_Specific/volumeFiltering/Makefile +++ b/Samples/5_Domain_Specific/volumeFiltering/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/volumeFiltering/NsightEclipse.xml b/Samples/5_Domain_Specific/volumeFiltering/NsightEclipse.xml index 813926c5..7b54f46a 100644 --- a/Samples/5_Domain_Specific/volumeFiltering/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/volumeFiltering/NsightEclipse.xml @@ -3,25 +3,25 @@ volumeFiltering - cudaGraphicsMapResources - cudaGraphicsResourceGetMappedPointer cudaMemcpy - cudaMemcpyToSymbol + cudaGraphicsMapResources cudaDestroySurfaceObject - cudaPitchedPtr - cudaMalloc - cudaGraphicsUnregisterResource + cudaExtent cudaDeviceSynchronize + cudaCreateSurfaceObject + cudaMemcpyToSymbol + cudaPitchedPtr + cudaGraphicsResourceGetMappedPointer + cudaCreateTextureObject + cudaGraphicsUnmapResources + cudaMallocArray + cudaFreeArray cudaDestroyTextureObject cudaMemset - cudaFree - cudaFreeArray cudaGraphicsGLRegisterBuffer - cudaExtent - cudaCreateSurfaceObject - cudaCreateTextureObject - cudaMallocArray - cudaGraphicsUnmapResources + cudaFree + cudaGraphicsUnregisterResource + cudaMalloc whole @@ -86,6 +86,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/volumeFiltering/README.md b/Samples/5_Domain_Specific/volumeFiltering/README.md index 2476ecf5..a2bc76e3 100644 --- a/Samples/5_Domain_Specific/volumeFiltering/README.md +++ b/Samples/5_Domain_Specific/volumeFiltering/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing, 3D Textures, Surface Writes ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGraphicsMapResources, cudaGraphicsResourceGetMappedPointer, cudaMemcpy, cudaMemcpyToSymbol, cudaDestroySurfaceObject, cudaPitchedPtr, cudaMalloc, cudaGraphicsUnregisterResource, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaMemset, cudaFree, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaExtent, cudaCreateSurfaceObject, cudaCreateTextureObject, cudaMallocArray, cudaGraphicsUnmapResources +cudaMemcpy, cudaGraphicsMapResources, cudaDestroySurfaceObject, cudaExtent, cudaDeviceSynchronize, cudaCreateSurfaceObject, cudaMemcpyToSymbol, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaCreateTextureObject, cudaGraphicsUnmapResources, cudaMallocArray, cudaFreeArray, cudaDestroyTextureObject, cudaMemset, cudaGraphicsGLRegisterBuffer, cudaFree, cudaGraphicsUnregisterResource, cudaMalloc ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/volumeFiltering/findgllib.mk b/Samples/5_Domain_Specific/volumeFiltering/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/5_Domain_Specific/volumeFiltering/findgllib.mk +++ b/Samples/5_Domain_Specific/volumeFiltering/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2017.vcxproj b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2017.vcxproj index 71b27a63..6e068663 100644 --- a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/volumeFiltering.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -122,6 +122,6 @@ - + diff --git a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2019.vcxproj b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2019.vcxproj index af2faa78..2a01aa1c 100644 --- a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/volumeFiltering.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2022.vcxproj b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2022.vcxproj index 3dc02a43..04608aae 100644 --- a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/volumeFiltering.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/5_Domain_Specific/volumeRender/Makefile b/Samples/5_Domain_Specific/volumeRender/Makefile index b2a74d0d..fdb5649f 100644 --- a/Samples/5_Domain_Specific/volumeRender/Makefile +++ b/Samples/5_Domain_Specific/volumeRender/Makefile @@ -299,9 +299,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/volumeRender/NsightEclipse.xml b/Samples/5_Domain_Specific/volumeRender/NsightEclipse.xml index bfef15a2..05b789d9 100644 --- a/Samples/5_Domain_Specific/volumeRender/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/volumeRender/NsightEclipse.xml @@ -3,24 +3,24 @@ volumeRender - cudaMemcpyToSymbol - cudaMemset - cudaFree - cudaGraphicsMapResources - cudaFreeArray - cudaGraphicsGLRegisterBuffer - cudaGraphicsResourceGetMappedPointer - cudaExtent - cudaDeviceSynchronize - cudaDestroyTextureObject - cudaPitchedPtr - cudaCreateTextureObject - cudaMalloc cudaProfilerStop - cudaMallocArray - cudaGraphicsUnregisterResource cudaGraphicsUnmapResources cudaMemcpy + cudaMallocArray + cudaFreeArray + cudaFree + cudaPitchedPtr + cudaGraphicsResourceGetMappedPointer + cudaGraphicsMapResources + cudaDestroyTextureObject + cudaExtent + cudaDeviceSynchronize + cudaCreateTextureObject + cudaMemset + cudaMemcpyToSymbol + cudaGraphicsUnregisterResource + cudaMalloc + cudaGraphicsGLRegisterBuffer whole @@ -84,6 +84,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/volumeRender/README.md b/Samples/5_Domain_Specific/volumeRender/README.md index acc1fb2a..d5d6ef58 100644 --- a/Samples/5_Domain_Specific/volumeRender/README.md +++ b/Samples/5_Domain_Specific/volumeRender/README.md @@ -10,7 +10,7 @@ Graphics Interop, Image Processing, 3D Textures ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemcpyToSymbol, cudaMemset, cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaExtent, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaPitchedPtr, cudaCreateTextureObject, cudaMalloc, cudaProfilerStop, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy +cudaProfilerStop, cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemset, cudaMemcpyToSymbol, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer ## Dependencies needed to build/run [X11](../../../README.md#x11), [GL](../../../README.md#gl) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/volumeRender/findgllib.mk b/Samples/5_Domain_Specific/volumeRender/findgllib.mk index f0a5c551..998fcf0f 100644 --- a/Samples/5_Domain_Specific/volumeRender/findgllib.mk +++ b/Samples/5_Domain_Specific/volumeRender/findgllib.mk @@ -53,11 +53,12 @@ endif ifeq ("$(TARGET_OS)","linux") # $(info) >> findgllib.mk -> LINUX path <<<) # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) + UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) + FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) + RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) + CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux") DFLT_PATH ?= /usr/lib endif endif + ifeq ("$(SUSE)","0") GLPATH ?= /usr/X11R6/lib64 GLLINK ?= -L/usr/X11R6/lib64 DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(FEDORA)","0") + else GLPATH ?= /usr/lib64/nvidia GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif - ifeq ("$(RHEL)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - ifeq ("$(CENTOS)","0") - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - + # find libGL, libGLU GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null) GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null) diff --git a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2017.vcxproj b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2017.vcxproj index 3e6388ca..ab6037e9 100644 --- a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/volumeRender.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -118,6 +118,6 @@ - + diff --git a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2019.vcxproj b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2019.vcxproj index 44536db8..c752510d 100644 --- a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/volumeRender.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2022.vcxproj b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2022.vcxproj index 30b3f8fe..77698195 100644 --- a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/volumeRender.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -114,6 +114,6 @@ - + diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/Makefile b/Samples/5_Domain_Specific/vulkanImageCUDA/Makefile index 7e592bbc..56b3696a 100644 --- a/Samples/5_Domain_Specific/vulkanImageCUDA/Makefile +++ b/Samples/5_Domain_Specific/vulkanImageCUDA/Makefile @@ -338,9 +338,9 @@ endif # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/NsightEclipse.xml b/Samples/5_Domain_Specific/vulkanImageCUDA/NsightEclipse.xml index 33025d3a..4fa867e9 100644 --- a/Samples/5_Domain_Specific/vulkanImageCUDA/NsightEclipse.xml +++ b/Samples/5_Domain_Specific/vulkanImageCUDA/NsightEclipse.xml @@ -6,33 +6,33 @@ --std=c++11 - cudaGetMipmappedArrayLevel - cudaImportExternalSemaphore - cudaExternalMemoryGetMappedMipmappedArray - cudaMemcpy - cudaDestroyExternalMemory - cudaSignalExternalSemaphoresAsync - cudaFreeMipmappedArray cudaVkSemaphoreSignal - cudaVkImportImageMem - cudaDestroySurfaceObject - cudaImportExternalMemory - cudaMalloc - cudaSetDevice - cudaGetDeviceProperties - cudaGetDeviceCount - cudaDestroyTextureObject - cudaUpdateVkImage - cudaDestroyExternalSemaphore - cudaFree - cudaStreamCreate - cudaVkSemaphoreWait - cudaExtent - cudaVkImportSemaphore - cudaCreateSurfaceObject - cudaMallocMipmappedArray - cudaCreateTextureObject cudaWaitExternalSemaphoresAsync + cudaMemcpy + cudaVkImportSemaphore + cudaImportExternalSemaphore + cudaGetMipmappedArrayLevel + cudaSetDevice + cudaDestroySurfaceObject + cudaExtent + cudaMallocMipmappedArray + cudaCreateSurfaceObject + cudaStreamCreate + cudaSignalExternalSemaphoresAsync + cudaUpdateVkImage + cudaCreateTextureObject + cudaGetDeviceCount + cudaExternalMemoryGetMappedMipmappedArray + cudaDestroyTextureObject + cudaVkImportImageMem + cudaDestroyExternalMemory + cudaVkSemaphoreWait + cudaImportExternalMemory + cudaDestroyExternalSemaphore + cudaFreeMipmappedArray + cudaFree + cudaMalloc + cudaGetDeviceProperties whole @@ -80,6 +80,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/README.md b/Samples/5_Domain_Specific/vulkanImageCUDA/README.md index 4b5e262a..97f2de7d 100644 --- a/Samples/5_Domain_Specific/vulkanImageCUDA/README.md +++ b/Samples/5_Domain_Specific/vulkanImageCUDA/README.md @@ -10,7 +10,7 @@ Graphics Interop, CUDA Vulkan Interop, Data Parallel Algorithms ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGetMipmappedArrayLevel, cudaImportExternalSemaphore, cudaExternalMemoryGetMappedMipmappedArray, cudaMemcpy, cudaDestroyExternalMemory, cudaSignalExternalSemaphoresAsync, cudaFreeMipmappedArray, cudaVkSemaphoreSignal, cudaVkImportImageMem, cudaDestroySurfaceObject, cudaImportExternalMemory, cudaMalloc, cudaSetDevice, cudaGetDeviceProperties, cudaGetDeviceCount, cudaDestroyTextureObject, cudaUpdateVkImage, cudaDestroyExternalSemaphore, cudaFree, cudaStreamCreate, cudaVkSemaphoreWait, cudaExtent, cudaVkImportSemaphore, cudaCreateSurfaceObject, cudaMallocMipmappedArray, cudaCreateTextureObject, cudaWaitExternalSemaphoresAsync +cudaVkSemaphoreSignal, cudaWaitExternalSemaphoresAsync, cudaMemcpy, cudaVkImportSemaphore, cudaImportExternalSemaphore, cudaGetMipmappedArrayLevel, cudaSetDevice, cudaDestroySurfaceObject, cudaExtent, cudaMallocMipmappedArray, cudaCreateSurfaceObject, cudaStreamCreate, cudaSignalExternalSemaphoresAsync, cudaUpdateVkImage, cudaCreateTextureObject, cudaGetDeviceCount, cudaExternalMemoryGetMappedMipmappedArray, cudaDestroyTextureObject, cudaVkImportImageMem, cudaDestroyExternalMemory, cudaVkSemaphoreWait, cudaImportExternalMemory, cudaDestroyExternalSemaphore, cudaFreeMipmappedArray, cudaFree, cudaMalloc, cudaGetDeviceProperties ## Dependencies needed to build/run [X11](../../../README.md#x11), [VULKAN](../../../README.md#vulkan) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/findvulkan.mk b/Samples/5_Domain_Specific/vulkanImageCUDA/findvulkan.mk index 4d4e8aed..004ab22b 100644 --- a/Samples/5_Domain_Specific/vulkanImageCUDA/findvulkan.mk +++ b/Samples/5_Domain_Specific/vulkanImageCUDA/findvulkan.mk @@ -57,6 +57,7 @@ ifeq ("$(TARGET_OS)","linux") RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) + KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) ifeq ("$(UBUNTU)","0") ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) GLPATH := /usr/arm-linux-gnueabihf/lib @@ -103,6 +104,11 @@ ifeq ("$(TARGET_OS)","linux") GLLINK ?= -L/usr/lib64/nvidia DFLT_PATH ?= /usr/lib64 endif + ifeq ("$(KYLIN)","0") + GLPATH ?= /usr/lib64/nvidia + GLLINK ?= -L/usr/lib64/nvidia + DFLT_PATH ?= /usr/lib64 + endif VULKAN_SDK_PATH ?= ${VULKAN_SDK} diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj index 93db012f..424170ab 100644 --- a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj +++ b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/vulkanImageCUDA.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -117,6 +117,6 @@ - + diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj index 632afdc2..769231d5 100644 --- a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj +++ b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/vulkanImageCUDA.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -113,6 +113,6 @@ - + diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2022.vcxproj b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2022.vcxproj index 530a2f71..99726397 100644 --- a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2022.vcxproj +++ b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/vulkanImageCUDA.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -113,6 +113,6 @@ - + diff --git a/Samples/6_Performance/UnifiedMemoryPerf/Makefile b/Samples/6_Performance/UnifiedMemoryPerf/Makefile index 05eb45ee..2f1a6188 100644 --- a/Samples/6_Performance/UnifiedMemoryPerf/Makefile +++ b/Samples/6_Performance/UnifiedMemoryPerf/Makefile @@ -285,9 +285,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/6_Performance/UnifiedMemoryPerf/NsightEclipse.xml b/Samples/6_Performance/UnifiedMemoryPerf/NsightEclipse.xml index a3b50f03..6217899d 100644 --- a/Samples/6_Performance/UnifiedMemoryPerf/NsightEclipse.xml +++ b/Samples/6_Performance/UnifiedMemoryPerf/NsightEclipse.xml @@ -3,19 +3,19 @@ UnifiedMemoryPerf + cudaMemcpy cudaStreamDestroy + cudaMemPrefetchAsync cudaFree cudaMallocHost cudaMallocManaged - cudaMemPrefetchAsync - cudaStreamCreate cudaStreamAttachMemAsync + cudaHostGetDevicePointer cudaFreeHost + cudaStreamSynchronize cudaMalloc cudaMemcpyAsync - cudaStreamSynchronize - cudaHostGetDevicePointer - cudaMemcpy + cudaStreamCreate cudaGetDeviceProperties @@ -66,6 +66,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/6_Performance/UnifiedMemoryPerf/README.md b/Samples/6_Performance/UnifiedMemoryPerf/README.md index 575b7ee1..6c1ce62c 100644 --- a/Samples/6_Performance/UnifiedMemoryPerf/README.md +++ b/Samples/6_Performance/UnifiedMemoryPerf/README.md @@ -10,7 +10,7 @@ CUDA Systems Integration, Unified Memory, CUDA Streams and Events, Pinned System ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64 ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaStreamDestroy, cudaFree, cudaMallocHost, cudaMallocManaged, cudaMemPrefetchAsync, cudaStreamCreate, cudaStreamAttachMemAsync, cudaFreeHost, cudaMalloc, cudaMemcpyAsync, cudaStreamSynchronize, cudaHostGetDevicePointer, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaStreamDestroy, cudaMemPrefetchAsync, cudaFree, cudaMallocHost, cudaMallocManaged, cudaStreamAttachMemAsync, cudaHostGetDevicePointer, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceProperties ## Dependencies needed to build/run [UVM](../../../README.md#uvm) ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Make sure the dependencies mentioned in [Dependencies]() section above are installed. ## Build and Run diff --git a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj index 5b99766a..7d59c0ae 100644 --- a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj +++ b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/UnifiedMemoryPerf.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -110,6 +110,6 @@ - + diff --git a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj index 823351a1..9b5e3657 100644 --- a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj +++ b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/UnifiedMemoryPerf.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2022.vcxproj b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2022.vcxproj index 7dfac552..536d6d5b 100644 --- a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2022.vcxproj +++ b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/UnifiedMemoryPerf.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -106,6 +106,6 @@ - + diff --git a/Samples/6_Performance/alignedTypes/Makefile b/Samples/6_Performance/alignedTypes/Makefile index 907e3f44..492ad197 100644 --- a/Samples/6_Performance/alignedTypes/Makefile +++ b/Samples/6_Performance/alignedTypes/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/6_Performance/alignedTypes/NsightEclipse.xml b/Samples/6_Performance/alignedTypes/NsightEclipse.xml index 388b2897..0b1d16af 100644 --- a/Samples/6_Performance/alignedTypes/NsightEclipse.xml +++ b/Samples/6_Performance/alignedTypes/NsightEclipse.xml @@ -3,11 +3,11 @@ alignedTypes - cudaMemset + cudaMemcpy cudaFree cudaDeviceSynchronize + cudaMemset cudaMalloc - cudaMemcpy cudaGetDeviceProperties @@ -46,6 +46,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/6_Performance/alignedTypes/README.md b/Samples/6_Performance/alignedTypes/README.md index 85c8bd55..56e5e265 100644 --- a/Samples/6_Performance/alignedTypes/README.md +++ b/Samples/6_Performance/alignedTypes/README.md @@ -10,7 +10,7 @@ Performance Strategies ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaMemset, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties +cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMemset, cudaMalloc, cudaGetDeviceProperties ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/6_Performance/alignedTypes/alignedTypes_vs2017.vcxproj b/Samples/6_Performance/alignedTypes/alignedTypes_vs2017.vcxproj index 73e89699..7e3439f6 100644 --- a/Samples/6_Performance/alignedTypes/alignedTypes_vs2017.vcxproj +++ b/Samples/6_Performance/alignedTypes/alignedTypes_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/alignedTypes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/6_Performance/alignedTypes/alignedTypes_vs2019.vcxproj b/Samples/6_Performance/alignedTypes/alignedTypes_vs2019.vcxproj index da732d28..833b531c 100644 --- a/Samples/6_Performance/alignedTypes/alignedTypes_vs2019.vcxproj +++ b/Samples/6_Performance/alignedTypes/alignedTypes_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/alignedTypes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/6_Performance/alignedTypes/alignedTypes_vs2022.vcxproj b/Samples/6_Performance/alignedTypes/alignedTypes_vs2022.vcxproj index d24f2749..ea911036 100644 --- a/Samples/6_Performance/alignedTypes/alignedTypes_vs2022.vcxproj +++ b/Samples/6_Performance/alignedTypes/alignedTypes_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/alignedTypes.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/6_Performance/transpose/Makefile b/Samples/6_Performance/transpose/Makefile index 50355e56..83909bce 100644 --- a/Samples/6_Performance/transpose/Makefile +++ b/Samples/6_Performance/transpose/Makefile @@ -279,9 +279,9 @@ LIBRARIES := # Gencode arguments ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 +SMS ?= 53 61 70 72 75 80 86 87 90 else -SMS ?= 35 37 50 52 60 61 70 75 80 86 +SMS ?= 35 37 50 52 60 61 70 75 80 86 90 endif ifeq ($(SMS),) diff --git a/Samples/6_Performance/transpose/NsightEclipse.xml b/Samples/6_Performance/transpose/NsightEclipse.xml index a080824a..58f448f6 100644 --- a/Samples/6_Performance/transpose/NsightEclipse.xml +++ b/Samples/6_Performance/transpose/NsightEclipse.xml @@ -3,17 +3,17 @@ transpose - cudaFree - cudaEventRecord - cudaEventCreate - cudaEventElapsedTime - cudaEventSynchronize - cudaMalloc - cudaEventDestroy - cudaGetLastError cudaMemcpy - cudaGetDeviceProperties + cudaMalloc + cudaFree + cudaGetLastError + cudaEventSynchronize + cudaEventRecord cudaGetDevice + cudaEventDestroy + cudaEventElapsedTime + cudaGetDeviceProperties + cudaEventCreate whole @@ -55,6 +55,7 @@ sm80 sm86 sm87 + sm90 x86_64 diff --git a/Samples/6_Performance/transpose/README.md b/Samples/6_Performance/transpose/README.md index 2d787068..0693888b 100644 --- a/Samples/6_Performance/transpose/README.md +++ b/Samples/6_Performance/transpose/README.md @@ -10,7 +10,7 @@ Performance Strategies, Linear Algebra ## Supported SM Architectures -[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) +[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) ## Supported OSes @@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l ## CUDA APIs involved ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaEventDestroy, cudaGetLastError, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice +cudaMemcpy, cudaMalloc, cudaFree, cudaGetLastError, cudaEventSynchronize, cudaEventRecord, cudaGetDevice, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate ## Prerequisites -Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. ## Build and Run diff --git a/Samples/6_Performance/transpose/transpose_vs2017.vcxproj b/Samples/6_Performance/transpose/transpose_vs2017.vcxproj index 0c04b15f..a9f215af 100644 --- a/Samples/6_Performance/transpose/transpose_vs2017.vcxproj +++ b/Samples/6_Performance/transpose/transpose_vs2017.vcxproj @@ -38,7 +38,7 @@ - + @@ -67,7 +67,7 @@ $(OutDir)/transpose.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -107,6 +107,6 @@ - + diff --git a/Samples/6_Performance/transpose/transpose_vs2019.vcxproj b/Samples/6_Performance/transpose/transpose_vs2019.vcxproj index 6cf4cb22..e4721874 100644 --- a/Samples/6_Performance/transpose/transpose_vs2019.vcxproj +++ b/Samples/6_Performance/transpose/transpose_vs2019.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/transpose.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - + diff --git a/Samples/6_Performance/transpose/transpose_vs2022.vcxproj b/Samples/6_Performance/transpose/transpose_vs2022.vcxproj index 3c7e497c..1db2b8cd 100644 --- a/Samples/6_Performance/transpose/transpose_vs2022.vcxproj +++ b/Samples/6_Performance/transpose/transpose_vs2022.vcxproj @@ -34,7 +34,7 @@ - + @@ -63,7 +63,7 @@ $(OutDir)/transpose.exe - compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86; + compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90; -Xcompiler "/wd 4819" --threads 0 ./;../../../Common WIN32 @@ -103,6 +103,6 @@ - +