diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..a3062bea
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.vscode/*
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4bd17b62..c619c923 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,10 @@
## Changelog
+### CUDA 11.6
+* Added new folder structure for samples
+* Added support of Visual Studio 2022 to all samples supported on [Windows](#windows-1).
+* All CUDA samples are now only available on [GitHub](https://github.com/nvidia/cuda-samples). They are no longer available via CUDA toolkit.
+
### CUDA 11.5
* Added `cuDLAHybridMode`. Demonstrate usage of cuDLA in hybrid mode.
* Added `cuDLAStandaloneMode`. Demonstrate usage of cuDLA in standalone mode.
diff --git a/Common/helper_cuda.h b/Common/helper_cuda.h
index 98a5a7b6..f6bea97a 100644
--- a/Common/helper_cuda.h
+++ b/Common/helper_cuda.h
@@ -666,6 +666,7 @@ inline int _ConvertSMVer2Cores(int major, int minor) {
{0x80, 64},
{0x86, 128},
{0x87, 128},
+ {0x90, 128},
{-1, -1}};
int index = 0;
@@ -712,6 +713,8 @@ inline const char* _ConvertSMVer2ArchName(int major, int minor) {
{0x75, "Turing"},
{0x80, "Ampere"},
{0x86, "Ampere"},
+ {0x87, "Ampere"},
+ {0x90, "Hopper"},
{-1, "Graphics Device"}};
int index = 0;
diff --git a/Common/helper_cuda_drvapi.h b/Common/helper_cuda_drvapi.h
index f0362d64..80979b5b 100644
--- a/Common/helper_cuda_drvapi.h
+++ b/Common/helper_cuda_drvapi.h
@@ -114,6 +114,7 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) {
{0x80, 64},
{0x86, 128},
{0x87, 128},
+ {0x90, 128},
{-1, -1}};
int index = 0;
diff --git a/README.md b/README.md
index 7a37e198..354fa6a4 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,12 @@
# CUDA Samples
-Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads).
+Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads).
## Release Notes
This section describes the release notes for the CUDA Samples on GitHub only.
-### CUDA 11.6
+### CUDA 11.8
* Added new folder structure for samples
* Added support of Visual Studio 2022 to all samples supported on [Windows](#windows-1).
* All CUDA samples are now only available on [GitHub](https://github.com/nvidia/cuda-samples). They are no longer available via CUDA toolkit.
@@ -17,7 +17,7 @@ This section describes the release notes for the CUDA Samples on GitHub only.
### Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), and the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).
### Getting the CUDA Samples
@@ -263,4 +263,4 @@ Answers to frequently asked questions about CUDA can be found at http://develope
## Attributions
-* Teapot image is obtained from [Wikimedia](https://en.wikipedia.org/wiki/File:Original_Utah_Teapot.jpg) and is licensed under the Creative Commons [Attribution-Share Alike 2.0](https://creativecommons.org/licenses/by-sa/2.0/deed.en) Generic license. The image is modified for samples use cases.
\ No newline at end of file
+* Teapot image is obtained from [Wikimedia](https://en.wikipedia.org/wiki/File:Original_Utah_Teapot.jpg) and is licensed under the Creative Commons [Attribution-Share Alike 2.0](https://creativecommons.org/licenses/by-sa/2.0/deed.en) Generic license. The image is modified for samples use cases.
diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/Makefile b/Samples/0_Introduction/UnifiedMemoryStreams/Makefile
index ba7b78b4..e244dab5 100644
--- a/Samples/0_Introduction/UnifiedMemoryStreams/Makefile
+++ b/Samples/0_Introduction/UnifiedMemoryStreams/Makefile
@@ -318,9 +318,9 @@ endif
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml b/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml
index 033c1c50..744caa12 100644
--- a/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml
+++ b/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml
@@ -6,11 +6,11 @@
cudaStreamDestroy
cudaFree
cudaMallocManaged
- cudaStreamCreate
- cudaDeviceSynchronize
cudaStreamAttachMemAsync
cudaSetDevice
+ cudaDeviceSynchronize
cudaStreamSynchronize
+ cudaStreamCreate
cudaGetDeviceProperties
@@ -70,6 +70,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/README.md b/Samples/0_Introduction/UnifiedMemoryStreams/README.md
index 347649da..417cf3a0 100644
--- a/Samples/0_Introduction/UnifiedMemoryStreams/README.md
+++ b/Samples/0_Introduction/UnifiedMemoryStreams/README.md
@@ -10,7 +10,7 @@ CUDA Systems Integration, OpenMP, CUBLAS, Multithreading, Unified Memory, CUDA S
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaMallocManaged, cudaStreamCreate, cudaDeviceSynchronize, cudaStreamAttachMemAsync, cudaSetDevice, cudaStreamSynchronize, cudaGetDeviceProperties
+cudaStreamDestroy, cudaFree, cudaMallocManaged, cudaStreamAttachMemAsync, cudaSetDevice, cudaDeviceSynchronize, cudaStreamSynchronize, cudaStreamCreate, cudaGetDeviceProperties
## Dependencies needed to build/run
[OpenMP](../../../README.md#openmp), [UVM](../../../README.md#uvm), [CUBLAS](../../../README.md#cublas)
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run
diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj
index e5e99aac..9680c777 100644
--- a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj
+++ b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/UnifiedMemoryStreams.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -108,6 +108,6 @@
-
+
diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj
index 75e8d36d..866e26dc 100644
--- a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj
+++ b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/UnifiedMemoryStreams.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -104,6 +104,6 @@
-
+
diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj
index ba409655..07478748 100644
--- a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj
+++ b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/UnifiedMemoryStreams.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -104,6 +104,6 @@
-
+
diff --git a/Samples/0_Introduction/asyncAPI/Makefile b/Samples/0_Introduction/asyncAPI/Makefile
index 885bbc8e..71bb4794 100644
--- a/Samples/0_Introduction/asyncAPI/Makefile
+++ b/Samples/0_Introduction/asyncAPI/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/asyncAPI/NsightEclipse.xml b/Samples/0_Introduction/asyncAPI/NsightEclipse.xml
index 6d0bbc62..d823ac8a 100644
--- a/Samples/0_Introduction/asyncAPI/NsightEclipse.xml
+++ b/Samples/0_Introduction/asyncAPI/NsightEclipse.xml
@@ -3,21 +3,21 @@
asyncAPI
- cudaMemset
+ cudaProfilerStop
+ cudaMalloc
+ cudaMemcpyAsync
cudaFree
- cudaEventRecord
cudaMallocHost
cudaProfilerStart
- cudaEventCreate
- cudaEventElapsedTime
cudaDeviceSynchronize
+ cudaEventRecord
cudaFreeHost
- cudaMalloc
- cudaEventQuery
- cudaProfilerStop
+ cudaMemset
cudaEventDestroy
- cudaMemcpyAsync
+ cudaEventQuery
+ cudaEventElapsedTime
cudaGetDeviceProperties
+ cudaEventCreate
whole
@@ -59,6 +59,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/asyncAPI/README.md b/Samples/0_Introduction/asyncAPI/README.md
index 81da4efc..7f4f3b42 100644
--- a/Samples/0_Introduction/asyncAPI/README.md
+++ b/Samples/0_Introduction/asyncAPI/README.md
@@ -10,7 +10,7 @@ Asynchronous Data Transfers, CUDA Streams and Events
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaEventRecord, cudaMallocHost, cudaProfilerStart, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaEventQuery, cudaProfilerStop, cudaEventDestroy, cudaMemcpyAsync, cudaGetDeviceProperties
+cudaProfilerStop, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaMallocHost, cudaProfilerStart, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaMemset, cudaEventDestroy, cudaEventQuery, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj
index f2de8d87..ccea698d 100644
--- a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj
+++ b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/asyncAPI.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj
index be2679b8..56489567 100644
--- a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj
+++ b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/asyncAPI.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj
index 9d98bdff..c4b23b8f 100644
--- a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj
+++ b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/asyncAPI.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/c++11_cuda/Makefile b/Samples/0_Introduction/c++11_cuda/Makefile
index f70e1ad6..d4c77f61 100644
--- a/Samples/0_Introduction/c++11_cuda/Makefile
+++ b/Samples/0_Introduction/c++11_cuda/Makefile
@@ -310,9 +310,9 @@ endif
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
@@ -363,7 +363,6 @@ run: build
$(EXEC) ./c++11_cuda
testrun: build
- $(EXEC) ./c++11_cuda --dummy-test-param
clean:
rm -f c++11_cuda c++11_cuda.o
diff --git a/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml b/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml
index e9acaddc..ccb26ce1 100644
--- a/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml
+++ b/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml
@@ -7,9 +7,9 @@
cudaMalloc
+ cudaMemcpy
cudaMemset
cudaFree
- cudaMemcpy
whole
@@ -31,9 +31,6 @@
true
c++11_cuda.cu
-
- --dummy-test-param
-
CPP11
@@ -54,6 +51,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/c++11_cuda/README.md b/Samples/0_Introduction/c++11_cuda/README.md
index 0ff9f23e..a889fb7c 100644
--- a/Samples/0_Introduction/c++11_cuda/README.md
+++ b/Samples/0_Introduction/c++11_cuda/README.md
@@ -10,7 +10,7 @@ CPP11 CUDA
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaMemset, cudaFree, cudaMemcpy
+cudaMalloc, cudaMemcpy, cudaMemset, cudaFree
## Dependencies needed to build/run
[CPP11](../../../README.md#cpp11)
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run
diff --git a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj
index 33d8ff13..705e575c 100644
--- a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj
+++ b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/c++11_cuda.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj
index 3d1bc27d..e4e93dee 100644
--- a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj
+++ b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/c++11_cuda.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj
index 9bf5532c..8133b615 100644
--- a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj
+++ b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/c++11_cuda.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/clock/Makefile b/Samples/0_Introduction/clock/Makefile
index dd832757..df4722cb 100644
--- a/Samples/0_Introduction/clock/Makefile
+++ b/Samples/0_Introduction/clock/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/clock/NsightEclipse.xml b/Samples/0_Introduction/clock/NsightEclipse.xml
index eee903b9..6d8cfb09 100644
--- a/Samples/0_Introduction/clock/NsightEclipse.xml
+++ b/Samples/0_Introduction/clock/NsightEclipse.xml
@@ -4,8 +4,8 @@
clock
cudaMalloc
- cudaFree
cudaMemcpy
+ cudaFree
whole
@@ -47,6 +47,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/clock/README.md b/Samples/0_Introduction/clock/README.md
index 98ffd744..11f9afd4 100644
--- a/Samples/0_Introduction/clock/README.md
+++ b/Samples/0_Introduction/clock/README.md
@@ -10,7 +10,7 @@ Performance Strategies
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaMemcpy
+cudaMalloc, cudaMemcpy, cudaFree
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/clock/clock_vs2017.vcxproj b/Samples/0_Introduction/clock/clock_vs2017.vcxproj
index ba348ad3..2350f365 100644
--- a/Samples/0_Introduction/clock/clock_vs2017.vcxproj
+++ b/Samples/0_Introduction/clock/clock_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/clock.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/clock/clock_vs2019.vcxproj b/Samples/0_Introduction/clock/clock_vs2019.vcxproj
index a20c90b7..6649beca 100644
--- a/Samples/0_Introduction/clock/clock_vs2019.vcxproj
+++ b/Samples/0_Introduction/clock/clock_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/clock.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/clock/clock_vs2022.vcxproj b/Samples/0_Introduction/clock/clock_vs2022.vcxproj
index 10e92347..4cf6b895 100644
--- a/Samples/0_Introduction/clock/clock_vs2022.vcxproj
+++ b/Samples/0_Introduction/clock/clock_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/clock.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/clock_nvrtc/README.md b/Samples/0_Introduction/clock_nvrtc/README.md
index 8f16c6d2..5e1dbf0f 100644
--- a/Samples/0_Introduction/clock_nvrtc/README.md
+++ b/Samples/0_Introduction/clock_nvrtc/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Runtime Compilation
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64
## CUDA APIs involved
### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
cudaBlockSize, cudaGridSize
@@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run
diff --git a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj
index 03b11e36..ec582a9f 100644
--- a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj
+++ b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj
index 80f3f59d..e5b93b60 100644
--- a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj
+++ b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj
index 0cf812f4..825d8e05 100644
--- a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj
+++ b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/concurrentKernels/Makefile b/Samples/0_Introduction/concurrentKernels/Makefile
index 0073ee5e..e6e4e241 100644
--- a/Samples/0_Introduction/concurrentKernels/Makefile
+++ b/Samples/0_Introduction/concurrentKernels/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml b/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml
index dd564b83..edfb7ff5 100644
--- a/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml
+++ b/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml
@@ -3,22 +3,22 @@
concurrentKernels
- cudaStreamWaitEvent
cudaStreamDestroy
- cudaFree
- cudaEventRecord
- cudaMallocHost
- cudaStreamCreate
- cudaEventCreate
- cudaEventElapsedTime
- cudaEventSynchronize
- cudaFreeHost
cudaMalloc
- cudaEventCreateWithFlags
- cudaEventDestroy
cudaMemcpyAsync
- cudaGetDeviceProperties
+ cudaFree
+ cudaMallocHost
+ cudaEventCreateWithFlags
+ cudaEventSynchronize
+ cudaEventRecord
+ cudaFreeHost
cudaGetDevice
+ cudaStreamWaitEvent
+ cudaEventDestroy
+ cudaEventElapsedTime
+ cudaStreamCreate
+ cudaGetDeviceProperties
+ cudaEventCreate
whole
@@ -57,6 +57,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/concurrentKernels/README.md b/Samples/0_Introduction/concurrentKernels/README.md
index 96816ffa..f83e3bdd 100644
--- a/Samples/0_Introduction/concurrentKernels/README.md
+++ b/Samples/0_Introduction/concurrentKernels/README.md
@@ -10,7 +10,7 @@ Performance Strategies
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamWaitEvent, cudaStreamDestroy, cudaFree, cudaEventRecord, cudaMallocHost, cudaStreamCreate, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventCreateWithFlags, cudaEventDestroy, cudaMemcpyAsync, cudaGetDeviceProperties, cudaGetDevice
+cudaStreamDestroy, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaMallocHost, cudaEventCreateWithFlags, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDevice, cudaStreamWaitEvent, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaGetDeviceProperties, cudaEventCreate
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj
index bb646789..59cad7eb 100644
--- a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj
+++ b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/concurrentKernels.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj
index 2830f310..faee059d 100644
--- a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj
+++ b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/concurrentKernels.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj
index a528a3fb..abf2d5e0 100644
--- a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj
+++ b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/concurrentKernels.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/cppIntegration/Makefile b/Samples/0_Introduction/cppIntegration/Makefile
index 19301286..ebe106e2 100644
--- a/Samples/0_Introduction/cppIntegration/Makefile
+++ b/Samples/0_Introduction/cppIntegration/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/cppIntegration/NsightEclipse.xml b/Samples/0_Introduction/cppIntegration/NsightEclipse.xml
index 9f70719b..9b5f9b41 100644
--- a/Samples/0_Introduction/cppIntegration/NsightEclipse.xml
+++ b/Samples/0_Introduction/cppIntegration/NsightEclipse.xml
@@ -4,8 +4,8 @@
cppIntegration
cudaMalloc
- cudaFree
cudaMemcpy
+ cudaFree
whole
@@ -41,6 +41,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/cppIntegration/README.md b/Samples/0_Introduction/cppIntegration/README.md
index 2ba64fd7..4ac48bcf 100644
--- a/Samples/0_Introduction/cppIntegration/README.md
+++ b/Samples/0_Introduction/cppIntegration/README.md
@@ -10,7 +10,7 @@ CPP-CUDA Integration
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaMemcpy
+cudaMalloc, cudaMemcpy, cudaFree
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj
index e6846211..4070ae91 100644
--- a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj
+++ b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/cppIntegration.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -109,6 +109,6 @@
-
+
diff --git a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj
index a57aa19b..67d587aa 100644
--- a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj
+++ b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/cppIntegration.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -105,6 +105,6 @@
-
+
diff --git a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj
index 26e82803..8ed0d991 100644
--- a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj
+++ b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/cppIntegration.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -105,6 +105,6 @@
-
+
diff --git a/Samples/0_Introduction/cppOverload/Makefile b/Samples/0_Introduction/cppOverload/Makefile
index cfd5ec9c..a76aca05 100644
--- a/Samples/0_Introduction/cppOverload/Makefile
+++ b/Samples/0_Introduction/cppOverload/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/cppOverload/NsightEclipse.xml b/Samples/0_Introduction/cppOverload/NsightEclipse.xml
index 8c5b84a7..9ad898be 100644
--- a/Samples/0_Introduction/cppOverload/NsightEclipse.xml
+++ b/Samples/0_Introduction/cppOverload/NsightEclipse.xml
@@ -3,17 +3,17 @@
cppOverload
- cudaFree
+ cudaMemcpy
cudaFuncSetCacheConfig
+ cudaFree
cudaMallocHost
- cudaFuncGetAttributes
- cudaGetDeviceCount
+ cudaSetDevice
+ cudaGetDeviceProperties
cudaDeviceSynchronize
cudaFreeHost
cudaMalloc
- cudaSetDevice
- cudaMemcpy
- cudaGetDeviceProperties
+ cudaFuncGetAttributes
+ cudaGetDeviceCount
whole
@@ -52,6 +52,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/cppOverload/README.md b/Samples/0_Introduction/cppOverload/README.md
index 16b6adc1..bc583bfd 100644
--- a/Samples/0_Introduction/cppOverload/README.md
+++ b/Samples/0_Introduction/cppOverload/README.md
@@ -10,7 +10,7 @@ C++ Function Overloading, CUDA Streams and Events
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFuncSetCacheConfig, cudaMallocHost, cudaFuncGetAttributes, cudaGetDeviceCount, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFuncSetCacheConfig, cudaFree, cudaMallocHost, cudaSetDevice, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceCount
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj b/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj
index 0a082724..4adb6ea7 100644
--- a/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj
+++ b/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/cppOverload.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj b/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj
index ba8bee01..040f08cf 100644
--- a/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj
+++ b/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/cppOverload.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj b/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj
index 4e849ea5..a9592ff8 100644
--- a/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj
+++ b/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/cppOverload.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/cudaOpenMP/Makefile b/Samples/0_Introduction/cudaOpenMP/Makefile
index 476ddbc9..277357e2 100644
--- a/Samples/0_Introduction/cudaOpenMP/Makefile
+++ b/Samples/0_Introduction/cudaOpenMP/Makefile
@@ -321,9 +321,9 @@ endif
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/cudaOpenMP/README.md b/Samples/0_Introduction/cudaOpenMP/README.md
index 9f446d04..5a57d918 100644
--- a/Samples/0_Introduction/cudaOpenMP/README.md
+++ b/Samples/0_Introduction/cudaOpenMP/README.md
@@ -10,7 +10,7 @@ CUDA Systems Integration, OpenMP, Multithreading
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaGetDeviceCount, cudaSetDevice, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaGetLastError, cudaSetDevice, cudaGetDeviceCount, cudaGetDevice, cudaMemset, cudaMalloc, cudaGetDeviceProperties
## Dependencies needed to build/run
[OpenMP](../../../README.md#openmp)
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run
diff --git a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj
index 57636e0c..b6a822e0 100644
--- a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj
+++ b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/cudaOpenMP.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -108,6 +108,6 @@
-
+
diff --git a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj
index 5e7d2b50..991ca21e 100644
--- a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj
+++ b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/cudaOpenMP.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -104,6 +104,6 @@
-
+
diff --git a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj
index 8628c83b..adf14793 100644
--- a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj
+++ b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/cudaOpenMP.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -104,6 +104,6 @@
-
+
diff --git a/Samples/0_Introduction/fp16ScalarProduct/Makefile b/Samples/0_Introduction/fp16ScalarProduct/Makefile
index c80fe84a..5dda1a89 100644
--- a/Samples/0_Introduction/fp16ScalarProduct/Makefile
+++ b/Samples/0_Introduction/fp16ScalarProduct/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 60 61 70 75 80 86
+SMS ?= 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml b/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml
index fed67440..045bce43 100644
--- a/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml
+++ b/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml
@@ -3,11 +3,11 @@
fp16ScalarProduct
+ cudaMemcpy
cudaFree
cudaMallocHost
cudaFreeHost
cudaMalloc
- cudaMemcpy
cudaGetDeviceProperties
@@ -44,6 +44,7 @@
sm80
sm86
sm87
+ sm90
arm
diff --git a/Samples/0_Introduction/fp16ScalarProduct/README.md b/Samples/0_Introduction/fp16ScalarProduct/README.md
index 3875a40a..4aa2b89c 100644
--- a/Samples/0_Introduction/fp16ScalarProduct/README.md
+++ b/Samples/0_Introduction/fp16ScalarProduct/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API
## Supported SM Architectures
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaGetDeviceProperties
## Dependencies needed to build/run
[FP16](../../../README.md#fp16)
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run
diff --git a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj
index f1199a8e..c4dbdc75 100644
--- a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj
+++ b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/fp16ScalarProduct.exe
- compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj
index bcd5c50c..0b9a749f 100644
--- a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj
+++ b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/fp16ScalarProduct.exe
- compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj
index c316ee62..ee4258a8 100644
--- a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj
+++ b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/fp16ScalarProduct.exe
- compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/matrixMul/Makefile b/Samples/0_Introduction/matrixMul/Makefile
index a8b38ccd..a4d336b5 100644
--- a/Samples/0_Introduction/matrixMul/Makefile
+++ b/Samples/0_Introduction/matrixMul/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/matrixMul/NsightEclipse.xml b/Samples/0_Introduction/matrixMul/NsightEclipse.xml
index e90b6c7e..3f517967 100644
--- a/Samples/0_Introduction/matrixMul/NsightEclipse.xml
+++ b/Samples/0_Introduction/matrixMul/NsightEclipse.xml
@@ -3,20 +3,20 @@
matrixMul
+ cudaStreamCreateWithFlags
+ cudaProfilerStop
+ cudaMalloc
cudaFree
- cudaEventRecord
cudaMallocHost
cudaProfilerStart
- cudaEventCreate
- cudaEventElapsedTime
cudaEventSynchronize
+ cudaEventRecord
cudaFreeHost
- cudaMalloc
- cudaProfilerStop
- cudaStreamCreateWithFlags
- cudaEventDestroy
cudaStreamSynchronize
+ cudaEventDestroy
+ cudaEventElapsedTime
cudaMemcpyAsync
+ cudaEventCreate
whole
@@ -56,6 +56,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/matrixMul/README.md b/Samples/0_Introduction/matrixMul/README.md
index c558141a..b0e121b2 100644
--- a/Samples/0_Introduction/matrixMul/README.md
+++ b/Samples/0_Introduction/matrixMul/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaMallocHost, cudaProfilerStart, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaProfilerStop, cudaStreamCreateWithFlags, cudaEventDestroy, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaProfilerStop, cudaMalloc, cudaFree, cudaMallocHost, cudaProfilerStart, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaStreamSynchronize, cudaEventDestroy, cudaEventElapsedTime, cudaMemcpyAsync, cudaEventCreate
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj b/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj
index 5bc23eb0..95f6a03a 100644
--- a/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj
+++ b/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/matrixMul.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj b/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj
index 7373d385..375f668a 100644
--- a/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj
+++ b/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/matrixMul.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj b/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj
index 36e7c4e9..e406cc03 100644
--- a/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj
+++ b/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/matrixMul.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/matrixMulDrv/Makefile b/Samples/0_Introduction/matrixMulDrv/Makefile
index 794345b6..83476982 100644
--- a/Samples/0_Introduction/matrixMulDrv/Makefile
+++ b/Samples/0_Introduction/matrixMulDrv/Makefile
@@ -283,9 +283,9 @@ FATBIN_FILE := matrixMul_kernel${TARGET_SIZE}.fatbin
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/matrixMulDrv/README.md b/Samples/0_Introduction/matrixMulDrv/README.md
index 804e7d81..682fb940 100644
--- a/Samples/0_Introduction/matrixMulDrv/README.md
+++ b/Samples/0_Introduction/matrixMulDrv/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, Matrix Multiply
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64
## CUDA APIs involved
### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemcpyHtoD, cuModuleLoadData, cuCtxCreate, cuLaunchKernel, cuDeviceGetName, cuMemAlloc, cuOccupancyMaxPotentialBlockSize, cuDeviceTotalMem, cuMemFree, cuMemcpyDtoH, cuCtxDestroy, cuDeviceGetAttribute
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuDeviceGetName, cuDeviceTotalMem, cuDeviceGetAttribute, cuModuleLoadData, cuOccupancyMaxPotentialBlockSize, cuMemAlloc, cuMemFree, cuCtxDestroy, cuModuleGetFunction, cuCtxCreate
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj
index ff911452..73998761 100644
--- a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj
+++ b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/matrixMulDrv.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -111,6 +111,6 @@
-
+
diff --git a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj
index 1f6b88ad..0805c97c 100644
--- a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj
+++ b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/matrixMulDrv.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj
index e92ce9ce..a82bb699 100644
--- a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj
+++ b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/matrixMulDrv.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/README.md b/Samples/0_Introduction/matrixMulDynlinkJIT/README.md
index 18350964..657811d3 100644
--- a/Samples/0_Introduction/matrixMulDynlinkJIT/README.md
+++ b/Samples/0_Introduction/matrixMulDynlinkJIT/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, CUDA Dynamically Linked Library
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuParamSetv, cuMemFree, cuInit, cuModuleGetFunction, cuCtxDestroy, cuCtxCreate, cuDeviceGetName, cuCtxSynchronize, cuParamSeti, cuModuleLoadDataEx, cuDeviceGet, cuFuncSetSharedSize, cuMemAlloc, cuDeviceComputeCapability, cuFuncSetBlockShape, cuMemcpyHtoD, cuParamSetSize, cuLaunchGrid, cuDeviceGetCount, cuLaunchKernel, cuMemcpyDtoH
+cuMemcpyDtoH, cuDeviceGetName, cuParamSeti, cuModuleLoadDataEx, cuModuleGetFunction, cuLaunchGrid, cuFuncSetSharedSize, cuMemFree, cuParamSetSize, cuParamSetv, cuInit, cuMemcpyHtoD, cuLaunchKernel, cuDeviceGet, cuFuncSetBlockShape, cuCtxDestroy, cuDeviceGetCount, cuDeviceComputeCapability, cuCtxSynchronize, cuMemAlloc, cuCtxCreate
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h b/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h
index 5f69d332..4ca66fde 100644
--- a/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h
+++ b/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h
@@ -95,6 +95,7 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) {
{0x80, 64},
{0x86, 128},
{0x87, 128},
+ {0x90, 128},
{-1, -1}};
int index = 0;
diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj
index da13462f..8b146bde 100644
--- a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj
+++ b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -116,6 +116,6 @@
-
+
diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj
index 460bc3de..3fc6842c 100644
--- a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj
+++ b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -112,6 +112,6 @@
-
+
diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj
index d5ac5358..732e0b22 100644
--- a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj
+++ b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -112,6 +112,6 @@
-
+
diff --git a/Samples/0_Introduction/matrixMul_nvrtc/README.md b/Samples/0_Introduction/matrixMul_nvrtc/README.md
index 2cefe20e..224c3ee0 100644
--- a/Samples/0_Introduction/matrixMul_nvrtc/README.md
+++ b/Samples/0_Introduction/matrixMul_nvrtc/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra, Runtime Compilation
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
## CUDA APIs involved
### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
## Dependencies needed to build/run
[NVRTC](../../../README.md#nvrtc)
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run
diff --git a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj
index 1b4a7eb0..7833bb47 100644
--- a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj
+++ b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -113,6 +113,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group
-
+
diff --git a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj
index cf0c66c8..d0b58366 100644
--- a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj
+++ b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -109,6 +109,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group
-
+
diff --git a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj
index f6dc2b6f..6fa7922e 100644
--- a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj
+++ b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -109,6 +109,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group
-
+
diff --git a/Samples/0_Introduction/mergeSort/Makefile b/Samples/0_Introduction/mergeSort/Makefile
index ad45af87..815268b1 100644
--- a/Samples/0_Introduction/mergeSort/Makefile
+++ b/Samples/0_Introduction/mergeSort/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/mergeSort/NsightEclipse.xml b/Samples/0_Introduction/mergeSort/NsightEclipse.xml
index 0a77b65e..55cab906 100644
--- a/Samples/0_Introduction/mergeSort/NsightEclipse.xml
+++ b/Samples/0_Introduction/mergeSort/NsightEclipse.xml
@@ -4,9 +4,9 @@
mergeSort
cudaMalloc
- cudaFree
cudaDeviceSynchronize
cudaMemcpy
+ cudaFree
whole
@@ -46,6 +46,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/mergeSort/README.md b/Samples/0_Introduction/mergeSort/README.md
index d7ab7be2..d0853896 100644
--- a/Samples/0_Introduction/mergeSort/README.md
+++ b/Samples/0_Introduction/mergeSort/README.md
@@ -10,7 +10,7 @@ Data-Parallel Algorithms
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy
+cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj b/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj
index 9f5e39b3..0ef07013 100644
--- a/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj
+++ b/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/mergeSort.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -111,6 +111,6 @@
-
+
diff --git a/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj b/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj
index 8639bd2e..5796dda5 100644
--- a/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj
+++ b/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/mergeSort.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj b/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj
index c38e79d6..ed951e9f 100644
--- a/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj
+++ b/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/mergeSort.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleAWBarrier/Makefile b/Samples/0_Introduction/simpleAWBarrier/Makefile
index cd8dc51d..0fa1e665 100644
--- a/Samples/0_Introduction/simpleAWBarrier/Makefile
+++ b/Samples/0_Introduction/simpleAWBarrier/Makefile
@@ -316,9 +316,9 @@ endif
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 70 72 75 80 86 87
+SMS ?= 70 72 75 80 86 87 90
else
-SMS ?= 70 75 80 86
+SMS ?= 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml b/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml
index e8738f6a..87414f89 100644
--- a/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml
@@ -6,17 +6,17 @@
--std=c++11
- cudaFree
- cudaMallocHost
- cudaOccupancyMaxActiveBlocksPerMultiprocessor
- cudaOccupancyMaxPotentialBlockSize
- cudaDeviceGetAttribute
- cudaFreeHost
- cudaMalloc
cudaStreamCreateWithFlags
- cudaLaunchCooperativeKernel
+ cudaFree
+ cudaDeviceGetAttribute
+ cudaMallocHost
+ cudaFreeHost
cudaStreamSynchronize
+ cudaLaunchCooperativeKernel
+ cudaMalloc
+ cudaOccupancyMaxActiveBlocksPerMultiprocessor
cudaMemcpyAsync
+ cudaOccupancyMaxPotentialBlockSize
whole
@@ -53,6 +53,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/simpleAWBarrier/README.md b/Samples/0_Introduction/simpleAWBarrier/README.md
index d81ac1f8..064db83a 100644
--- a/Samples/0_Introduction/simpleAWBarrier/README.md
+++ b/Samples/0_Introduction/simpleAWBarrier/README.md
@@ -10,7 +10,7 @@ Arrive Wait Barrier
## Supported SM Architectures
-[SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMallocHost, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaOccupancyMaxPotentialBlockSize, cudaDeviceGetAttribute, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaLaunchCooperativeKernel, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaFree, cudaDeviceGetAttribute, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaLaunchCooperativeKernel, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaMemcpyAsync, cudaOccupancyMaxPotentialBlockSize
## Dependencies needed to build/run
[CPP11](../../../README.md#cpp11), [MBCG](../../../README.md#mbcg)
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run
diff --git a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj
index ea64526a..ed136540 100644
--- a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/simpleAWBarrier.exe
- compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj
index aaf046ed..eeddba29 100644
--- a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleAWBarrier.exe
- compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj
index 28637338..85eb24bf 100644
--- a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleAWBarrier.exe
- compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleAssert/Makefile b/Samples/0_Introduction/simpleAssert/Makefile
index fb73574b..bd790aa6 100644
--- a/Samples/0_Introduction/simpleAssert/Makefile
+++ b/Samples/0_Introduction/simpleAssert/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleAssert/NsightEclipse.xml b/Samples/0_Introduction/simpleAssert/NsightEclipse.xml
index 7f9e81f5..2ba03ec6 100644
--- a/Samples/0_Introduction/simpleAssert/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleAssert/NsightEclipse.xml
@@ -41,6 +41,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/simpleAssert/README.md b/Samples/0_Introduction/simpleAssert/README.md
index e5fbc1b3..05b753a1 100644
--- a/Samples/0_Introduction/simpleAssert/README.md
+++ b/Samples/0_Introduction/simpleAssert/README.md
@@ -10,7 +10,7 @@ Assert
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -27,7 +27,7 @@ cudaDeviceSynchronize, cudaGetErrorString
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj
index a033d7b0..731833f9 100644
--- a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/simpleAssert.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj
index a3233782..88e2fa09 100644
--- a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleAssert.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj
index de3d8f03..ce935120 100644
--- a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleAssert.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/README.md b/Samples/0_Introduction/simpleAssert_nvrtc/README.md
index d0ecd7e0..72c5de11 100644
--- a/Samples/0_Introduction/simpleAssert_nvrtc/README.md
+++ b/Samples/0_Introduction/simpleAssert_nvrtc/README.md
@@ -10,7 +10,7 @@ Assert, Runtime Compilation
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
## CUDA APIs involved
### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuCtxSynchronize, cuLaunchKernel
+cuModuleGetFunction, cuLaunchKernel, cuCtxSynchronize
## Dependencies needed to build/run
[NVRTC](../../../README.md#nvrtc)
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run
diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj
index 11b8003f..3fc089e5 100644
--- a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj
index 548b3baf..0714d837 100644
--- a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj
index 59d571ce..fc010fb0 100644
--- a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile b/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile
index 7b221fb2..b7222445 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml b/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml
index 093f108b..e9252d1c 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml
@@ -3,12 +3,12 @@
simpleAtomicIntrinsics
+ cudaStreamCreateWithFlags
cudaFree
cudaMallocHost
cudaFreeHost
- cudaMalloc
- cudaStreamCreateWithFlags
cudaStreamSynchronize
+ cudaMalloc
cudaMemcpyAsync
@@ -48,6 +48,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/README.md b/Samples/0_Introduction/simpleAtomicIntrinsics/README.md
index 81693b71..0fa52781 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics/README.md
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics/README.md
@@ -10,7 +10,7 @@ Atomic Intrinsics
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj
index 87308a5d..d122ae68 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/simpleAtomicIntrinsics.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -108,6 +108,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj
index b99f4190..7f05dcc6 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleAtomicIntrinsics.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -104,6 +104,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj
index bb0bb8df..7dd8d89e 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleAtomicIntrinsics.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -104,6 +104,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md
index 0d1700a7..a53e822b 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md
@@ -10,7 +10,7 @@ Atomic Intrinsics, Runtime Compilation
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64
## CUDA APIs involved
### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
cudaBlockSize, cudaGridSize
@@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj
index c51f9939..9db171b1 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -108,6 +108,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj
index 75b918e7..b43cec91 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -104,6 +104,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj
index f7cb9e38..bd705f44 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -104,6 +104,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleAttributes/Makefile b/Samples/0_Introduction/simpleAttributes/Makefile
index 00e9c4f0..e685dd69 100644
--- a/Samples/0_Introduction/simpleAttributes/Makefile
+++ b/Samples/0_Introduction/simpleAttributes/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml b/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml
index 5141efa1..fcad8235 100644
--- a/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml
@@ -3,15 +3,15 @@
simpleAttributes
- cudaDeviceSetLimit
cudaFree
cudaMallocHost
- cudaStreamCreate
cudaFreeHost
- cudaMalloc
- cudaStreamSetAttribute
- cudaMemcpyAsync
cudaStreamSynchronize
+ cudaStreamSetAttribute
+ cudaDeviceSetLimit
+ cudaMalloc
+ cudaMemcpyAsync
+ cudaStreamCreate
cudaGetDeviceProperties
@@ -49,6 +49,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/simpleAttributes/README.md b/Samples/0_Introduction/simpleAttributes/README.md
index e5b6fa87..5dc1787b 100644
--- a/Samples/0_Introduction/simpleAttributes/README.md
+++ b/Samples/0_Introduction/simpleAttributes/README.md
@@ -10,7 +10,7 @@ Attributes usage on stream
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceSetLimit, cudaFree, cudaMallocHost, cudaStreamCreate, cudaFreeHost, cudaMalloc, cudaStreamSetAttribute, cudaMemcpyAsync, cudaStreamSynchronize, cudaGetDeviceProperties
+cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaStreamSetAttribute, cudaDeviceSetLimit, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceProperties
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj
index 2752b266..a446d3a9 100644
--- a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/simpleAttributes.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj
index 620fdca3..e49167d9 100644
--- a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleAttributes.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj
index 9ab8f931..1eb61252 100644
--- a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleAttributes.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleCUDA2GL/Makefile b/Samples/0_Introduction/simpleCUDA2GL/Makefile
index 39042604..80e3250f 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/Makefile
+++ b/Samples/0_Introduction/simpleCUDA2GL/Makefile
@@ -311,9 +311,9 @@ endif
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml b/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml
index 55b9400c..1f40f86b 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml
@@ -3,19 +3,19 @@
simpleCUDA2GL
- cudaFree
- cudaGraphicsMapResources
- cudaGraphicsUnregisterResource
- cudaMemcpyToArray
- cudaGraphicsGLRegisterBuffer
cudaHostAlloc
- cudaGraphicsResourceGetMappedPointer
- cudaProcess
- cudaDeviceSynchronize
- cudaMalloc
- cudaGraphicsSubResourceGetMappedArray
- cudaGraphicsGLRegisterImage
cudaGraphicsUnmapResources
+ cudaMalloc
+ cudaFree
+ cudaGraphicsResourceGetMappedPointer
+ cudaGraphicsMapResources
+ cudaMemcpyToArray
+ cudaDeviceSynchronize
+ cudaProcess
+ cudaGraphicsUnregisterResource
+ cudaGraphicsSubResourceGetMappedArray
+ cudaGraphicsGLRegisterBuffer
+ cudaGraphicsGLRegisterImage
whole
@@ -79,6 +79,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/simpleCUDA2GL/README.md b/Samples/0_Introduction/simpleCUDA2GL/README.md
index c5f7bb6d..7c46fb7e 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/README.md
+++ b/Samples/0_Introduction/simpleCUDA2GL/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing, Performance Strategies
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,14 +23,14 @@ x86_64
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaMemcpyToArray, cudaGraphicsGLRegisterBuffer, cudaHostAlloc, cudaGraphicsResourceGetMappedPointer, cudaProcess, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGraphicsGLRegisterImage, cudaGraphicsUnmapResources
+cudaHostAlloc, cudaGraphicsUnmapResources, cudaMalloc, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaMemcpyToArray, cudaDeviceSynchronize, cudaProcess, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsGLRegisterImage
## Dependencies needed to build/run
[X11](../../../README.md#x11), [GL](../../../README.md#gl)
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run
diff --git a/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk b/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk
+++ b/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk
@@ -53,11 +53,12 @@ endif
ifeq ("$(TARGET_OS)","linux")
# $(info) >> findgllib.mk -> LINUX path <<<)
# Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
- UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?)
- FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?)
- RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
- CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?)
+ UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?)
+ FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?)
+ RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
+ CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?)
SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+ KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?)
ifeq ("$(UBUNTU)","0")
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
DFLT_PATH ?= /usr/lib
endif
endif
+
ifeq ("$(SUSE)","0")
GLPATH ?= /usr/X11R6/lib64
GLLINK ?= -L/usr/X11R6/lib64
DFLT_PATH ?= /usr/lib64
- endif
- ifeq ("$(FEDORA)","0")
+ else
GLPATH ?= /usr/lib64/nvidia
GLLINK ?= -L/usr/lib64/nvidia
DFLT_PATH ?= /usr/lib64
endif
- ifeq ("$(RHEL)","0")
- GLPATH ?= /usr/lib64/nvidia
- GLLINK ?= -L/usr/lib64/nvidia
- DFLT_PATH ?= /usr/lib64
- endif
- ifeq ("$(CENTOS)","0")
- GLPATH ?= /usr/lib64/nvidia
- GLLINK ?= -L/usr/lib64/nvidia
- DFLT_PATH ?= /usr/lib64
- endif
-
+
# find libGL, libGLU
GLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so -print 2>/dev/null)
GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj
index b1c84ae4..65865117 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/simpleCUDA2GL.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -118,6 +118,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj
index 955b060a..19ad1de2 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleCUDA2GL.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -114,6 +114,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj
index 46e97f62..f2b071ec 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleCUDA2GL.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -114,6 +114,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleCallback/Makefile b/Samples/0_Introduction/simpleCallback/Makefile
index fade686a..ff334a83 100644
--- a/Samples/0_Introduction/simpleCallback/Makefile
+++ b/Samples/0_Introduction/simpleCallback/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleCallback/NsightEclipse.xml b/Samples/0_Introduction/simpleCallback/NsightEclipse.xml
index 3ee3b90d..931c7c65 100644
--- a/Samples/0_Introduction/simpleCallback/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleCallback/NsightEclipse.xml
@@ -3,16 +3,16 @@
simpleCallback
+ cudaHostAlloc
cudaStreamDestroy
cudaFree
- cudaStreamCreate
- cudaHostAlloc
+ cudaSetDevice
cudaGetDeviceCount
cudaFreeHost
- cudaMalloc
- cudaSetDevice
- cudaMemcpyAsync
cudaStreamAddCallback
+ cudaMalloc
+ cudaMemcpyAsync
+ cudaStreamCreate
cudaGetDeviceProperties
@@ -53,6 +53,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/simpleCallback/README.md b/Samples/0_Introduction/simpleCallback/README.md
index 24c76c25..40368567 100644
--- a/Samples/0_Introduction/simpleCallback/README.md
+++ b/Samples/0_Introduction/simpleCallback/README.md
@@ -10,7 +10,7 @@ CUDA Streams, Callback Functions, Multithreading
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaHostAlloc, cudaGetDeviceCount, cudaFreeHost, cudaMalloc, cudaSetDevice, cudaMemcpyAsync, cudaStreamAddCallback, cudaGetDeviceProperties
+cudaHostAlloc, cudaStreamDestroy, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaFreeHost, cudaStreamAddCallback, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceProperties
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj
index 677ea6b2..ad8bf900 100644
--- a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/simpleCallback.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -108,6 +108,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj
index 0dcbadea..b200ba6f 100644
--- a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleCallback.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -104,6 +104,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj
index 6f2e491b..196f5794 100644
--- a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleCallback.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -104,6 +104,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleCooperativeGroups/Makefile b/Samples/0_Introduction/simpleCooperativeGroups/Makefile
index c45b7332..22efbff3 100644
--- a/Samples/0_Introduction/simpleCooperativeGroups/Makefile
+++ b/Samples/0_Introduction/simpleCooperativeGroups/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml b/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml
index 54d9c4d3..939f68ad 100644
--- a/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml
@@ -44,6 +44,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/simpleCooperativeGroups/README.md b/Samples/0_Introduction/simpleCooperativeGroups/README.md
index 7e80f6bc..ab3e11cc 100644
--- a/Samples/0_Introduction/simpleCooperativeGroups/README.md
+++ b/Samples/0_Introduction/simpleCooperativeGroups/README.md
@@ -10,7 +10,7 @@ Cooperative Groups
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -27,7 +27,7 @@ cudaDeviceSynchronize, cudaGetErrorString
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj
index 4a7bac2a..061538d8 100644
--- a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/simpleCooperativeGroups.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj
index 09d33159..bf17882a 100644
--- a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleCooperativeGroups.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj
index 12759203..649221c2 100644
--- a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleCooperativeGroups.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleCubemapTexture/Makefile b/Samples/0_Introduction/simpleCubemapTexture/Makefile
index fdff3980..4c1fed17 100644
--- a/Samples/0_Introduction/simpleCubemapTexture/Makefile
+++ b/Samples/0_Introduction/simpleCubemapTexture/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml b/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml
index 625ed7a5..1bf6b010 100644
--- a/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml
@@ -3,17 +3,17 @@
simpleCubemapTexture
- cudaFree
+ cudaMemcpy
+ cudaCreateChannelDesc
cudaFreeArray
+ cudaFree
+ cudaPitchedPtr
+ cudaPos
+ cudaDestroyTextureObject
cudaExtent
cudaDeviceSynchronize
- cudaDestroyTextureObject
- cudaPitchedPtr
cudaCreateTextureObject
cudaMalloc
- cudaCreateChannelDesc
- cudaPos
- cudaMemcpy
cudaGetDeviceProperties
@@ -52,6 +52,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/simpleCubemapTexture/README.md b/Samples/0_Introduction/simpleCubemapTexture/README.md
index 68f06947..44c3896f 100644
--- a/Samples/0_Introduction/simpleCubemapTexture/README.md
+++ b/Samples/0_Introduction/simpleCubemapTexture/README.md
@@ -10,7 +10,7 @@ Texture, Volume Processing
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFreeArray, cudaExtent, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaPitchedPtr, cudaCreateTextureObject, cudaMalloc, cudaCreateChannelDesc, cudaPos, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaCreateChannelDesc, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaPos, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaGetDeviceProperties
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj
index 26ae9423..307c5282 100644
--- a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/simpleCubemapTexture.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj
index bcdaec2f..709f0362 100644
--- a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleCubemapTexture.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj
index 0dd40bfd..3c332afb 100644
--- a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleCubemapTexture.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleDrvRuntime/Makefile b/Samples/0_Introduction/simpleDrvRuntime/Makefile
index 3cbc5811..46593a89 100644
--- a/Samples/0_Introduction/simpleDrvRuntime/Makefile
+++ b/Samples/0_Introduction/simpleDrvRuntime/Makefile
@@ -283,9 +283,9 @@ FATBIN_FILE := vectorAdd_kernel${TARGET_SIZE}.fatbin
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleDrvRuntime/README.md b/Samples/0_Introduction/simpleDrvRuntime/README.md
index 74ea4ad9..158157e8 100644
--- a/Samples/0_Introduction/simpleDrvRuntime/README.md
+++ b/Samples/0_Introduction/simpleDrvRuntime/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, CUDA Runtime API, Vector Addition
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuCtxDestroy, cuModuleLoadData, cuCtxCreate, cuLaunchKernel, cuModuleUnload, cuInit, cuModuleGetFunction
+cuLaunchKernel, cuModuleLoadData, cuCtxDestroy, cuModuleUnload, cuModuleGetFunction, cuCtxCreate, cuInit
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj
index 92e427cd..019fc0cc 100644
--- a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/simpleDrvRuntime.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -111,6 +111,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj
index 64f8fab9..727c658d 100644
--- a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleDrvRuntime.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj
index 7f3d2b01..93b2ffad 100644
--- a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleDrvRuntime.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleHyperQ/Makefile b/Samples/0_Introduction/simpleHyperQ/Makefile
index 48018511..16140688 100644
--- a/Samples/0_Introduction/simpleHyperQ/Makefile
+++ b/Samples/0_Introduction/simpleHyperQ/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml b/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml
index 8777270a..1a503845 100644
--- a/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml
@@ -3,20 +3,20 @@
simpleHyperQ
- cudaStreamDestroy
- cudaFree
- cudaEventRecord
- cudaMallocHost
- cudaStreamCreate
- cudaEventCreate
- cudaEventElapsedTime
- cudaEventSynchronize
- cudaFreeHost
- cudaMalloc
- cudaEventDestroy
cudaMemcpy
- cudaGetDeviceProperties
+ cudaStreamDestroy
+ cudaMalloc
+ cudaFree
+ cudaMallocHost
+ cudaEventSynchronize
+ cudaEventRecord
+ cudaFreeHost
cudaGetDevice
+ cudaEventDestroy
+ cudaEventElapsedTime
+ cudaStreamCreate
+ cudaGetDeviceProperties
+ cudaEventCreate
whole
@@ -62,6 +62,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/simpleHyperQ/README.md b/Samples/0_Introduction/simpleHyperQ/README.md
index 8527317e..467bc4b3 100644
--- a/Samples/0_Introduction/simpleHyperQ/README.md
+++ b/Samples/0_Introduction/simpleHyperQ/README.md
@@ -10,7 +10,7 @@ CUDA Systems Integration, Performance Strategies
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaEventRecord, cudaMallocHost, cudaStreamCreate, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventDestroy, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaStreamDestroy, cudaMalloc, cudaFree, cudaMallocHost, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDevice, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaGetDeviceProperties, cudaEventCreate
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj
index 1e4bedc3..d2bbd16a 100644
--- a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/simpleHyperQ.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj
index b9180fa0..3a6cc72c 100644
--- a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleHyperQ.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj
index b641b971..c15d7eec 100644
--- a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleHyperQ.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleIPC/Makefile b/Samples/0_Introduction/simpleIPC/Makefile
index 31945c1e..914f1ab6 100644
--- a/Samples/0_Introduction/simpleIPC/Makefile
+++ b/Samples/0_Introduction/simpleIPC/Makefile
@@ -303,9 +303,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleIPC/NsightEclipse.xml b/Samples/0_Introduction/simpleIPC/NsightEclipse.xml
index 23e3c5a7..d25608df 100644
--- a/Samples/0_Introduction/simpleIPC/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleIPC/NsightEclipse.xml
@@ -3,29 +3,29 @@
simpleIPC
- cudaDeviceEnablePeerAccess
- cudaIpcOpenEventHandle
- cudaOccupancyMaxActiveBlocksPerMultiprocessor
- cudaStreamCreateWithFlags
- cudaDeviceCanAccessPeer
- cudaMemcpyAsync
- cudaStreamDestroy
- cudaEventCreate
- cudaMalloc
- cudaEventDestroy
cudaSetDevice
- cudaIpcOpenMemHandle
- cudaGetDeviceProperties
- cudaGetDeviceCount
- cudaIpcGetEventHandle
- cudaGetLastError
- cudaStreamSynchronize
- cudaStreamWaitEvent
- cudaFree
cudaIpcCloseMemHandle
+ cudaEventDestroy
+ cudaGetDeviceCount
+ cudaMemcpyAsync
+ cudaDeviceCanAccessPeer
+ cudaStreamCreateWithFlags
+ cudaStreamDestroy
+ cudaGetLastError
+ cudaIpcOpenEventHandle
+ cudaIpcOpenMemHandle
+ cudaIpcGetEventHandle
+ cudaStreamWaitEvent
+ cudaEventCreate
+ cudaFree
+ cudaEventSynchronize
cudaEventRecord
cudaIpcGetMemHandle
- cudaEventSynchronize
+ cudaStreamSynchronize
+ cudaDeviceEnablePeerAccess
+ cudaMalloc
+ cudaOccupancyMaxActiveBlocksPerMultiprocessor
+ cudaGetDeviceProperties
whole
@@ -71,6 +71,7 @@
sm80
sm86
sm87
+ sm90
../../../Common/helper_multiprocess.cpp
../../../Common/helper_multiprocess.h
diff --git a/Samples/0_Introduction/simpleIPC/README.md b/Samples/0_Introduction/simpleIPC/README.md
index 1594c529..a9d3336b 100644
--- a/Samples/0_Introduction/simpleIPC/README.md
+++ b/Samples/0_Introduction/simpleIPC/README.md
@@ -10,7 +10,7 @@ CUDA Systems Integration, Peer to Peer, InterProcess Communication
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,14 +23,14 @@ x86_64, ppc64le
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceEnablePeerAccess, cudaIpcOpenEventHandle, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaStreamCreateWithFlags, cudaDeviceCanAccessPeer, cudaMemcpyAsync, cudaStreamDestroy, cudaEventCreate, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaIpcOpenMemHandle, cudaGetDeviceProperties, cudaGetDeviceCount, cudaIpcGetEventHandle, cudaGetLastError, cudaStreamSynchronize, cudaStreamWaitEvent, cudaFree, cudaIpcCloseMemHandle, cudaEventRecord, cudaIpcGetMemHandle, cudaEventSynchronize
+cudaSetDevice, cudaIpcCloseMemHandle, cudaEventDestroy, cudaGetDeviceCount, cudaMemcpyAsync, cudaDeviceCanAccessPeer, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaGetLastError, cudaIpcOpenEventHandle, cudaIpcOpenMemHandle, cudaIpcGetEventHandle, cudaStreamWaitEvent, cudaEventCreate, cudaFree, cudaEventSynchronize, cudaEventRecord, cudaIpcGetMemHandle, cudaStreamSynchronize, cudaDeviceEnablePeerAccess, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties
## Dependencies needed to build/run
[IPC](../../../README.md#ipc)
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run
diff --git a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj
index 99d342ac..03771430 100644
--- a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/simpleIPC.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -108,6 +108,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj
index 1e507919..4d8096a2 100644
--- a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleIPC.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -104,6 +104,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj
index d9e9f48d..df3aba1f 100644
--- a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleIPC.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -104,6 +104,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleLayeredTexture/Makefile b/Samples/0_Introduction/simpleLayeredTexture/Makefile
index bd2660f6..eeb6d7ea 100644
--- a/Samples/0_Introduction/simpleLayeredTexture/Makefile
+++ b/Samples/0_Introduction/simpleLayeredTexture/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml b/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml
index 91481c99..ff2bc6f0 100644
--- a/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml
@@ -3,17 +3,17 @@
simpleLayeredTexture
- cudaFree
+ cudaMemcpy
+ cudaCreateChannelDesc
cudaFreeArray
+ cudaFree
+ cudaPitchedPtr
+ cudaPos
+ cudaDestroyTextureObject
cudaExtent
cudaDeviceSynchronize
- cudaDestroyTextureObject
- cudaPitchedPtr
cudaCreateTextureObject
cudaMalloc
- cudaCreateChannelDesc
- cudaPos
- cudaMemcpy
cudaGetDeviceProperties
@@ -52,6 +52,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/simpleLayeredTexture/README.md b/Samples/0_Introduction/simpleLayeredTexture/README.md
index 3add6778..5dc0eb71 100644
--- a/Samples/0_Introduction/simpleLayeredTexture/README.md
+++ b/Samples/0_Introduction/simpleLayeredTexture/README.md
@@ -10,7 +10,7 @@ Texture, Volume Processing
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFreeArray, cudaExtent, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaPitchedPtr, cudaCreateTextureObject, cudaMalloc, cudaCreateChannelDesc, cudaPos, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaCreateChannelDesc, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaPos, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaGetDeviceProperties
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj
index 71e3a84c..ee1e3e42 100644
--- a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/simpleLayeredTexture.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj
index fe147d3e..3ae1a4f9 100644
--- a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleLayeredTexture.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj
index a99c2ee3..0d29aae5 100644
--- a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleLayeredTexture.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleMPI/Makefile b/Samples/0_Introduction/simpleMPI/Makefile
index 49fc56c3..8726e03d 100644
--- a/Samples/0_Introduction/simpleMPI/Makefile
+++ b/Samples/0_Introduction/simpleMPI/Makefile
@@ -335,9 +335,9 @@ endif
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleMPI/README.md b/Samples/0_Introduction/simpleMPI/README.md
index 6f56a03d..5e0f97fa 100644
--- a/Samples/0_Introduction/simpleMPI/README.md
+++ b/Samples/0_Introduction/simpleMPI/README.md
@@ -10,7 +10,7 @@ CUDA Systems Integration, MPI, Multithreading
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,14 +23,14 @@ x86_64, ppc64le
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaGetLastError, cudaFree, cudaMemcpy
+cudaMalloc, cudaGetLastError, cudaMemcpy, cudaFree
## Dependencies needed to build/run
[MPI](../../../README.md#mpi)
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run
diff --git a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj
index 4e1777bc..94e77612 100644
--- a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/simpleMPI.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -108,6 +108,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj
index cf5e568b..97822220 100644
--- a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleMPI.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -104,6 +104,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj
index 2959c87e..8f6ea5ae 100644
--- a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleMPI.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -104,6 +104,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleMultiCopy/Makefile b/Samples/0_Introduction/simpleMultiCopy/Makefile
index 26974b35..d6d253c6 100644
--- a/Samples/0_Introduction/simpleMultiCopy/Makefile
+++ b/Samples/0_Introduction/simpleMultiCopy/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml b/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml
index ca79562c..bb76ce8c 100644
--- a/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml
@@ -3,22 +3,22 @@
simpleMultiCopy
- cudaMemset
- cudaFree
- cudaStreamDestroy
- cudaEventRecord
- cudaStreamCreate
cudaHostAlloc
- cudaEventCreate
- cudaEventElapsedTime
- cudaDeviceSynchronize
- cudaEventSynchronize
- cudaFreeHost
+ cudaStreamDestroy
cudaMalloc
- cudaEventDestroy
- cudaSetDevice
cudaMemcpyAsync
+ cudaFree
+ cudaSetDevice
+ cudaEventSynchronize
+ cudaDeviceSynchronize
+ cudaEventRecord
+ cudaFreeHost
+ cudaMemset
+ cudaEventDestroy
+ cudaEventElapsedTime
+ cudaStreamCreate
cudaGetDeviceProperties
+ cudaEventCreate
whole
@@ -66,6 +66,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/simpleMultiCopy/README.md b/Samples/0_Introduction/simpleMultiCopy/README.md
index 8f015c26..72404287 100644
--- a/Samples/0_Introduction/simpleMultiCopy/README.md
+++ b/Samples/0_Introduction/simpleMultiCopy/README.md
@@ -10,7 +10,7 @@ CUDA Streams and Events, Asynchronous Data Transfers, Overlap Compute and Copy,
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaStreamDestroy, cudaEventRecord, cudaStreamCreate, cudaHostAlloc, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpyAsync, cudaGetDeviceProperties
+cudaHostAlloc, cudaStreamDestroy, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaSetDevice, cudaEventSynchronize, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaMemset, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaGetDeviceProperties, cudaEventCreate
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj
index 86ccf67a..8fbcf08c 100644
--- a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/simpleMultiCopy.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj
index d1a1609f..1f77866f 100644
--- a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleMultiCopy.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj
index 5251d592..447b6331 100644
--- a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleMultiCopy.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleMultiGPU/Makefile b/Samples/0_Introduction/simpleMultiGPU/Makefile
index 6db255e4..15d13dde 100644
--- a/Samples/0_Introduction/simpleMultiGPU/Makefile
+++ b/Samples/0_Introduction/simpleMultiGPU/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml b/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml
index 500fc9ea..a1e377e5 100644
--- a/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml
@@ -6,13 +6,13 @@
cudaStreamDestroy
cudaFree
cudaMallocHost
+ cudaSetDevice
+ cudaFreeHost
+ cudaStreamSynchronize
+ cudaMalloc
+ cudaMemcpyAsync
cudaStreamCreate
cudaGetDeviceCount
- cudaFreeHost
- cudaMalloc
- cudaSetDevice
- cudaStreamSynchronize
- cudaMemcpyAsync
whole
@@ -53,6 +53,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/simpleMultiGPU/README.md b/Samples/0_Introduction/simpleMultiGPU/README.md
index 0f8464c2..284904f8 100644
--- a/Samples/0_Introduction/simpleMultiGPU/README.md
+++ b/Samples/0_Introduction/simpleMultiGPU/README.md
@@ -10,7 +10,7 @@ Asynchronous Data Transfers, CUDA Streams and Events, Multithreading, Multi-GPU
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaMallocHost, cudaStreamCreate, cudaGetDeviceCount, cudaFreeHost, cudaMalloc, cudaSetDevice, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamDestroy, cudaFree, cudaMallocHost, cudaSetDevice, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceCount
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj
index bcc574be..a025b2ec 100644
--- a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/simpleMultiGPU.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj
index 30a6f199..2a6ce253 100644
--- a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleMultiGPU.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj
index 6fd4139c..315059ca 100644
--- a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleMultiGPU.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleOccupancy/Makefile b/Samples/0_Introduction/simpleOccupancy/Makefile
index 85aa3c9f..b735ec0c 100644
--- a/Samples/0_Introduction/simpleOccupancy/Makefile
+++ b/Samples/0_Introduction/simpleOccupancy/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml b/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml
index 34577846..e4383b1c 100644
--- a/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml
@@ -3,17 +3,17 @@
simpleOccupancy
- cudaFree
- cudaEventRecord
- cudaOccupancyMaxActiveBlocksPerMultiprocessor
- cudaEventCreate
- cudaOccupancyMaxPotentialBlockSize
- cudaEventElapsedTime
- cudaDeviceSynchronize
- cudaMalloc
cudaMemcpy
- cudaGetDeviceProperties
+ cudaFree
+ cudaDeviceSynchronize
+ cudaEventRecord
cudaGetDevice
+ cudaMalloc
+ cudaEventElapsedTime
+ cudaOccupancyMaxActiveBlocksPerMultiprocessor
+ cudaGetDeviceProperties
+ cudaOccupancyMaxPotentialBlockSize
+ cudaEventCreate
whole
@@ -52,6 +52,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/simpleOccupancy/README.md b/Samples/0_Introduction/simpleOccupancy/README.md
index 4ca96acf..ddc12f2a 100644
--- a/Samples/0_Introduction/simpleOccupancy/README.md
+++ b/Samples/0_Introduction/simpleOccupancy/README.md
@@ -10,7 +10,7 @@ Occupancy Calculator
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaEventCreate, cudaOccupancyMaxPotentialBlockSize, cudaEventElapsedTime, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMalloc, cudaEventElapsedTime, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties, cudaOccupancyMaxPotentialBlockSize, cudaEventCreate
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
diff --git a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj
index ee3e8ca5..d4d97a02 100644
--- a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/simpleOccupancy.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj
index a0db9b8e..096cea4a 100644
--- a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleOccupancy.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj
index 312b5e69..57de8a55 100644
--- a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleOccupancy.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleP2P/Makefile b/Samples/0_Introduction/simpleP2P/Makefile
index 036ff0d8..804aa449 100644
--- a/Samples/0_Introduction/simpleP2P/Makefile
+++ b/Samples/0_Introduction/simpleP2P/Makefile
@@ -303,9 +303,9 @@ LIBRARIES :=
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
endif
ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleP2P/NsightEclipse.xml b/Samples/0_Introduction/simpleP2P/NsightEclipse.xml
index 69fc274d..65fe83bb 100644
--- a/Samples/0_Introduction/simpleP2P/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleP2P/NsightEclipse.xml
@@ -3,23 +3,23 @@
simpleP2P
- cudaDeviceEnablePeerAccess
- cudaFree
- cudaEventRecord
- cudaMallocHost
- cudaGetDeviceCount
- cudaEventElapsedTime
- cudaDeviceSynchronize
- cudaEventSynchronize
- cudaFreeHost
- cudaMalloc
- cudaEventCreateWithFlags
- cudaDeviceCanAccessPeer
- cudaEventDestroy
- cudaSetDevice
- cudaDeviceDisablePeerAccess
cudaMemcpy
+ cudaMalloc
+ cudaFree
+ cudaMallocHost
+ cudaEventCreateWithFlags
+ cudaSetDevice
+ cudaEventSynchronize
+ cudaDeviceDisablePeerAccess
+ cudaGetDeviceCount
+ cudaDeviceSynchronize
+ cudaEventRecord
+ cudaFreeHost
cudaGetDeviceProperties
+ cudaDeviceEnablePeerAccess
+ cudaEventDestroy
+ cudaEventElapsedTime
+ cudaDeviceCanAccessPeer
whole
@@ -67,6 +67,7 @@
sm80
sm86
sm87
+ sm90
x86_64
diff --git a/Samples/0_Introduction/simpleP2P/README.md b/Samples/0_Introduction/simpleP2P/README.md
index cbe3b252..56b4b8bf 100644
--- a/Samples/0_Introduction/simpleP2P/README.md
+++ b/Samples/0_Introduction/simpleP2P/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Asynchronous Data Transfers, Unified Virtual Address Spa
## Supported SM Architectures
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
@@ -23,14 +23,14 @@ x86_64, ppc64le
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceEnablePeerAccess, cudaFree, cudaEventRecord, cudaMallocHost, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventCreateWithFlags, cudaDeviceCanAccessPeer, cudaEventDestroy, cudaSetDevice, cudaDeviceDisablePeerAccess, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaMalloc, cudaFree, cudaMallocHost, cudaEventCreateWithFlags, cudaSetDevice, cudaEventSynchronize, cudaDeviceDisablePeerAccess, cudaGetDeviceCount, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDeviceProperties, cudaDeviceEnablePeerAccess, cudaEventDestroy, cudaEventElapsedTime, cudaDeviceCanAccessPeer
## Dependencies needed to build/run
[only-64-bit](../../../README.md#only-64-bit)
## Prerequisites
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run
diff --git a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj
index aea119d9..41efff17 100644
--- a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj
@@ -38,7 +38,7 @@
-
+
@@ -67,7 +67,7 @@
$(OutDir)/simpleP2P.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -107,6 +107,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj
index af3b8074..d51f6d7b 100644
--- a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleP2P.exe
- compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;
+ compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;
-Xcompiler "/wd 4819" --threads 0
./;../../../Common
WIN32
@@ -103,6 +103,6 @@
-
+
diff --git a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj
index ea28f070..9ed32164 100644
--- a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj
@@ -34,7 +34,7 @@
-
+
@@ -63,7 +63,7 @@
$(OutDir)/simpleP2P.exe
-