diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..a3062bea
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.vscode/*
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4bd17b62..c619c923 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,10 @@
 ## Changelog
 
+### CUDA 11.6
+* Added new folder structure for samples
+* Added support of Visual Studio 2022 to all samples supported on [Windows](#windows-1).
+* All CUDA samples are now only available on [GitHub](https://github.com/nvidia/cuda-samples). They are no longer available via CUDA toolkit.
+
 ### CUDA 11.5
 * Added `cuDLAHybridMode`. Demonstrate usage of cuDLA in hybrid mode.
 * Added `cuDLAStandaloneMode`. Demonstrate usage of cuDLA in standalone mode.
diff --git a/Common/helper_cuda.h b/Common/helper_cuda.h
index 98a5a7b6..f6bea97a 100644
--- a/Common/helper_cuda.h
+++ b/Common/helper_cuda.h
@@ -666,6 +666,7 @@ inline int _ConvertSMVer2Cores(int major, int minor) {
       {0x80,  64},
       {0x86, 128},
       {0x87, 128},
+      {0x90, 128},
       {-1, -1}};
 
   int index = 0;
@@ -712,6 +713,8 @@ inline const char* _ConvertSMVer2ArchName(int major, int minor) {
       {0x75, "Turing"},
       {0x80, "Ampere"},
       {0x86, "Ampere"},
+      {0x87, "Ampere"},
+      {0x90, "Hopper"},
       {-1, "Graphics Device"}};
 
   int index = 0;
diff --git a/Common/helper_cuda_drvapi.h b/Common/helper_cuda_drvapi.h
index f0362d64..80979b5b 100644
--- a/Common/helper_cuda_drvapi.h
+++ b/Common/helper_cuda_drvapi.h
@@ -114,6 +114,7 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) {
       {0x80,  64},
       {0x86, 128},
       {0x87, 128},
+      {0x90, 128},
       {-1, -1}};
 
   int index = 0;
diff --git a/README.md b/README.md
index 7a37e198..354fa6a4 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,12 @@
 # CUDA Samples
 
-Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads).
+Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads).
 
 ## Release Notes
 
 This section describes the release notes for the CUDA Samples on GitHub only.
 
-### CUDA 11.6
+### CUDA 11.8
 * Added new folder structure for samples
 * Added support of Visual Studio 2022 to all samples supported on [Windows](#windows-1).
 * All CUDA samples are now only available on [GitHub](https://github.com/nvidia/cuda-samples). They are no longer available via CUDA toolkit.
@@ -17,7 +17,7 @@ This section describes the release notes for the CUDA Samples on GitHub only.
 
 ### Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), and the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).
 
 ### Getting the CUDA Samples
@@ -263,4 +263,4 @@ Answers to frequently asked questions about CUDA can be found at http://develope
 
 ## Attributions
 
-*   Teapot image is obtained from [Wikimedia](https://en.wikipedia.org/wiki/File:Original_Utah_Teapot.jpg) and is licensed under the Creative Commons [Attribution-Share Alike 2.0](https://creativecommons.org/licenses/by-sa/2.0/deed.en) Generic license. The image is modified for samples use cases.
\ No newline at end of file
+*   Teapot image is obtained from [Wikimedia](https://en.wikipedia.org/wiki/File:Original_Utah_Teapot.jpg) and is licensed under the Creative Commons [Attribution-Share Alike 2.0](https://creativecommons.org/licenses/by-sa/2.0/deed.en) Generic license. The image is modified for samples use cases.
diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/Makefile b/Samples/0_Introduction/UnifiedMemoryStreams/Makefile
index ba7b78b4..e244dab5 100644
--- a/Samples/0_Introduction/UnifiedMemoryStreams/Makefile
+++ b/Samples/0_Introduction/UnifiedMemoryStreams/Makefile
@@ -318,9 +318,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml b/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml
index 033c1c50..744caa12 100644
--- a/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml
+++ b/Samples/0_Introduction/UnifiedMemoryStreams/NsightEclipse.xml
@@ -6,11 +6,11 @@
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocManaged</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaStreamAttachMemAsync</toolkit>
     <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates the use of OpenMP and streams with Unified Memory on a single GPU.]]></description>
@@ -70,6 +70,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/README.md b/Samples/0_Introduction/UnifiedMemoryStreams/README.md
index 347649da..417cf3a0 100644
--- a/Samples/0_Introduction/UnifiedMemoryStreams/README.md
+++ b/Samples/0_Introduction/UnifiedMemoryStreams/README.md
@@ -10,7 +10,7 @@ CUDA Systems Integration, OpenMP, CUBLAS, Multithreading, Unified Memory, CUDA S
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaMallocManaged, cudaStreamCreate, cudaDeviceSynchronize, cudaStreamAttachMemAsync, cudaSetDevice, cudaStreamSynchronize, cudaGetDeviceProperties
+cudaStreamDestroy, cudaFree, cudaMallocManaged, cudaStreamAttachMemAsync, cudaSetDevice, cudaDeviceSynchronize, cudaStreamSynchronize, cudaStreamCreate, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [OpenMP](../../../README.md#openmp), [UVM](../../../README.md#uvm), [CUBLAS](../../../README.md#cublas)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj
index e5e99aac..9680c777 100644
--- a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj
+++ b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj
index 75e8d36d..866e26dc 100644
--- a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj
+++ b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj
index ba409655..07478748 100644
--- a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj
+++ b/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/asyncAPI/Makefile b/Samples/0_Introduction/asyncAPI/Makefile
index 885bbc8e..71bb4794 100644
--- a/Samples/0_Introduction/asyncAPI/Makefile
+++ b/Samples/0_Introduction/asyncAPI/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/asyncAPI/NsightEclipse.xml b/Samples/0_Introduction/asyncAPI/NsightEclipse.xml
index 6d0bbc62..d823ac8a 100644
--- a/Samples/0_Introduction/asyncAPI/NsightEclipse.xml
+++ b/Samples/0_Introduction/asyncAPI/NsightEclipse.xml
@@ -3,21 +3,21 @@
 <entry>
   <name>asyncAPI</name>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaProfilerStop</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaProfilerStart</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventQuery</toolkit>
-    <toolkit>cudaProfilerStop</toolkit>
+    <toolkit>cudaMemset</toolkit>
     <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaEventQuery</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample illustrates the usage of CUDA events for both GPU timing and overlapping CPU and GPU execution. Events are inserted into a stream of CUDA calls. Since CUDA stream calls are asynchronous, the CPU can perform computations while GPU is executing (including DMA memcopies between the host and device). CPU can query CUDA events to determine whether GPU has completed tasks.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -59,6 +59,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/asyncAPI/README.md b/Samples/0_Introduction/asyncAPI/README.md
index 81da4efc..7f4f3b42 100644
--- a/Samples/0_Introduction/asyncAPI/README.md
+++ b/Samples/0_Introduction/asyncAPI/README.md
@@ -10,7 +10,7 @@ Asynchronous Data Transfers, CUDA Streams and Events
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaEventRecord, cudaMallocHost, cudaProfilerStart, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaEventQuery, cudaProfilerStop, cudaEventDestroy, cudaMemcpyAsync, cudaGetDeviceProperties
+cudaProfilerStop, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaMallocHost, cudaProfilerStart, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaMemset, cudaEventDestroy, cudaEventQuery, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj
index f2de8d87..ccea698d 100644
--- a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj
+++ b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/asyncAPI.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj
index be2679b8..56489567 100644
--- a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj
+++ b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/asyncAPI.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj
index 9d98bdff..c4b23b8f 100644
--- a/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj
+++ b/Samples/0_Introduction/asyncAPI/asyncAPI_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/asyncAPI.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/c++11_cuda/Makefile b/Samples/0_Introduction/c++11_cuda/Makefile
index f70e1ad6..d4c77f61 100644
--- a/Samples/0_Introduction/c++11_cuda/Makefile
+++ b/Samples/0_Introduction/c++11_cuda/Makefile
@@ -310,9 +310,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
@@ -363,7 +363,6 @@ run: build
 	$(EXEC) ./c++11_cuda
 
 testrun: build
-	$(EXEC) ./c++11_cuda --dummy-test-param
 
 clean:
 	rm -f c++11_cuda c++11_cuda.o
diff --git a/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml b/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml
index e9acaddc..ccb26ce1 100644
--- a/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml
+++ b/Samples/0_Introduction/c++11_cuda/NsightEclipse.xml
@@ -7,9 +7,9 @@
   </cflags>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaMemset</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates C++11 feature support in CUDA. It scans a input text file and prints no. of occurrences of x, y, z, w characters. ]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -31,9 +31,6 @@
   </librarypaths>
   <nsight_eclipse>true</nsight_eclipse>
   <primary_file>c++11_cuda.cu</primary_file>
-  <qatests>
-    <qatest>--dummy-test-param</qatest>
-  </qatests>
   <required_dependencies>
     <dependency>CPP11</dependency>
   </required_dependencies>
@@ -54,6 +51,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/c++11_cuda/README.md b/Samples/0_Introduction/c++11_cuda/README.md
index 0ff9f23e..a889fb7c 100644
--- a/Samples/0_Introduction/c++11_cuda/README.md
+++ b/Samples/0_Introduction/c++11_cuda/README.md
@@ -10,7 +10,7 @@ CPP11 CUDA
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaMemset, cudaFree, cudaMemcpy
+cudaMalloc, cudaMemcpy, cudaMemset, cudaFree
 
 ## Dependencies needed to build/run
 [CPP11](../../../README.md#cpp11)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj
index 33d8ff13..705e575c 100644
--- a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj
+++ b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj
index 3d1bc27d..e4e93dee 100644
--- a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj
+++ b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj
index 9bf5532c..8133b615 100644
--- a/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj
+++ b/Samples/0_Introduction/c++11_cuda/c++11_cuda_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/clock/Makefile b/Samples/0_Introduction/clock/Makefile
index dd832757..df4722cb 100644
--- a/Samples/0_Introduction/clock/Makefile
+++ b/Samples/0_Introduction/clock/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/clock/NsightEclipse.xml b/Samples/0_Introduction/clock/NsightEclipse.xml
index eee903b9..6d8cfb09 100644
--- a/Samples/0_Introduction/clock/NsightEclipse.xml
+++ b/Samples/0_Introduction/clock/NsightEclipse.xml
@@ -4,8 +4,8 @@
   <name>clock</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This example shows how to use the clock function to measure the performance of block of threads of a kernel accurately.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -47,6 +47,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/clock/README.md b/Samples/0_Introduction/clock/README.md
index 98ffd744..11f9afd4 100644
--- a/Samples/0_Introduction/clock/README.md
+++ b/Samples/0_Introduction/clock/README.md
@@ -10,7 +10,7 @@ Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaMemcpy
+cudaMalloc, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/clock/clock_vs2017.vcxproj b/Samples/0_Introduction/clock/clock_vs2017.vcxproj
index ba348ad3..2350f365 100644
--- a/Samples/0_Introduction/clock/clock_vs2017.vcxproj
+++ b/Samples/0_Introduction/clock/clock_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/clock.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/clock/clock_vs2019.vcxproj b/Samples/0_Introduction/clock/clock_vs2019.vcxproj
index a20c90b7..6649beca 100644
--- a/Samples/0_Introduction/clock/clock_vs2019.vcxproj
+++ b/Samples/0_Introduction/clock/clock_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/clock.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/clock/clock_vs2022.vcxproj b/Samples/0_Introduction/clock/clock_vs2022.vcxproj
index 10e92347..4cf6b895 100644
--- a/Samples/0_Introduction/clock/clock_vs2022.vcxproj
+++ b/Samples/0_Introduction/clock/clock_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/clock.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/clock_nvrtc/README.md b/Samples/0_Introduction/clock_nvrtc/README.md
index 8f16c6d2..5e1dbf0f 100644
--- a/Samples/0_Introduction/clock_nvrtc/README.md
+++ b/Samples/0_Introduction/clock_nvrtc/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 cudaBlockSize, cudaGridSize
@@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj
index 03b11e36..ec582a9f 100644
--- a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj
+++ b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj
index 80f3f59d..e5b93b60 100644
--- a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj
+++ b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj
index 0cf812f4..825d8e05 100644
--- a/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj
+++ b/Samples/0_Introduction/clock_nvrtc/clock_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/concurrentKernels/Makefile b/Samples/0_Introduction/concurrentKernels/Makefile
index 0073ee5e..e6e4e241 100644
--- a/Samples/0_Introduction/concurrentKernels/Makefile
+++ b/Samples/0_Introduction/concurrentKernels/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml b/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml
index dd564b83..edfb7ff5 100644
--- a/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml
+++ b/Samples/0_Introduction/concurrentKernels/NsightEclipse.xml
@@ -3,22 +3,22 @@
 <entry>
   <name>concurrentKernels</name>
   <cuda_api_list>
-    <toolkit>cudaStreamWaitEvent</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventCreateWithFlags</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaEventCreateWithFlags</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaStreamWaitEvent</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates the use of CUDA streams for concurrent execution of several kernels on GPU device. It also illustrates how to introduce dependencies between CUDA streams with the new cudaStreamWaitEvent function.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -57,6 +57,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/concurrentKernels/README.md b/Samples/0_Introduction/concurrentKernels/README.md
index 96816ffa..f83e3bdd 100644
--- a/Samples/0_Introduction/concurrentKernels/README.md
+++ b/Samples/0_Introduction/concurrentKernels/README.md
@@ -10,7 +10,7 @@ Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamWaitEvent, cudaStreamDestroy, cudaFree, cudaEventRecord, cudaMallocHost, cudaStreamCreate, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventCreateWithFlags, cudaEventDestroy, cudaMemcpyAsync, cudaGetDeviceProperties, cudaGetDevice
+cudaStreamDestroy, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaMallocHost, cudaEventCreateWithFlags, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDevice, cudaStreamWaitEvent, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj
index bb646789..59cad7eb 100644
--- a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj
+++ b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj
index 2830f310..faee059d 100644
--- a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj
+++ b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj
index a528a3fb..abf2d5e0 100644
--- a/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj
+++ b/Samples/0_Introduction/concurrentKernels/concurrentKernels_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/cppIntegration/Makefile b/Samples/0_Introduction/cppIntegration/Makefile
index 19301286..ebe106e2 100644
--- a/Samples/0_Introduction/cppIntegration/Makefile
+++ b/Samples/0_Introduction/cppIntegration/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/cppIntegration/NsightEclipse.xml b/Samples/0_Introduction/cppIntegration/NsightEclipse.xml
index 9f70719b..9b5f9b41 100644
--- a/Samples/0_Introduction/cppIntegration/NsightEclipse.xml
+++ b/Samples/0_Introduction/cppIntegration/NsightEclipse.xml
@@ -4,8 +4,8 @@
   <name>cppIntegration</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This example demonstrates how to integrate CUDA into an existing C++ application, i.e. the CUDA entry point on host side is only a function which is called from C++ code and only the file containing this function is compiled with nvcc. It also demonstrates that vector types can be used from cpp.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -41,6 +41,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/cppIntegration/README.md b/Samples/0_Introduction/cppIntegration/README.md
index 2ba64fd7..4ac48bcf 100644
--- a/Samples/0_Introduction/cppIntegration/README.md
+++ b/Samples/0_Introduction/cppIntegration/README.md
@@ -10,7 +10,7 @@ CPP-CUDA Integration
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaMemcpy
+cudaMalloc, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj
index e6846211..4070ae91 100644
--- a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj
+++ b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cppIntegration.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj
index a57aa19b..67d587aa 100644
--- a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj
+++ b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cppIntegration.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj
index 26e82803..8ed0d991 100644
--- a/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj
+++ b/Samples/0_Introduction/cppIntegration/cppIntegration_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cppIntegration.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/cppOverload/Makefile b/Samples/0_Introduction/cppOverload/Makefile
index cfd5ec9c..a76aca05 100644
--- a/Samples/0_Introduction/cppOverload/Makefile
+++ b/Samples/0_Introduction/cppOverload/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/cppOverload/NsightEclipse.xml b/Samples/0_Introduction/cppOverload/NsightEclipse.xml
index 8c5b84a7..9ad898be 100644
--- a/Samples/0_Introduction/cppOverload/NsightEclipse.xml
+++ b/Samples/0_Introduction/cppOverload/NsightEclipse.xml
@@ -3,17 +3,17 @@
 <entry>
   <name>cppOverload</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFuncSetCacheConfig</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaFuncGetAttributes</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaFuncGetAttributes</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates how to use C++ function overloading on the GPU.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/cppOverload/README.md b/Samples/0_Introduction/cppOverload/README.md
index 16b6adc1..bc583bfd 100644
--- a/Samples/0_Introduction/cppOverload/README.md
+++ b/Samples/0_Introduction/cppOverload/README.md
@@ -10,7 +10,7 @@ C++ Function Overloading, CUDA Streams and Events
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFuncSetCacheConfig, cudaMallocHost, cudaFuncGetAttributes, cudaGetDeviceCount, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFuncSetCacheConfig, cudaFree, cudaMallocHost, cudaSetDevice, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceCount
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj b/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj
index 0a082724..4adb6ea7 100644
--- a/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj
+++ b/Samples/0_Introduction/cppOverload/cppOverload_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cppOverload.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj b/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj
index ba8bee01..040f08cf 100644
--- a/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj
+++ b/Samples/0_Introduction/cppOverload/cppOverload_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cppOverload.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj b/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj
index 4e849ea5..a9592ff8 100644
--- a/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj
+++ b/Samples/0_Introduction/cppOverload/cppOverload_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cppOverload.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/cudaOpenMP/Makefile b/Samples/0_Introduction/cudaOpenMP/Makefile
index 476ddbc9..277357e2 100644
--- a/Samples/0_Introduction/cudaOpenMP/Makefile
+++ b/Samples/0_Introduction/cudaOpenMP/Makefile
@@ -321,9 +321,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/cudaOpenMP/README.md b/Samples/0_Introduction/cudaOpenMP/README.md
index 9f446d04..5a57d918 100644
--- a/Samples/0_Introduction/cudaOpenMP/README.md
+++ b/Samples/0_Introduction/cudaOpenMP/README.md
@@ -10,7 +10,7 @@ CUDA Systems Integration, OpenMP, Multithreading
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaGetDeviceCount, cudaSetDevice, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaGetLastError, cudaSetDevice, cudaGetDeviceCount, cudaGetDevice, cudaMemset, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [OpenMP](../../../README.md#openmp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj
index 57636e0c..b6a822e0 100644
--- a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj
+++ b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cudaOpenMP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj
index 5e7d2b50..991ca21e 100644
--- a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj
+++ b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cudaOpenMP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj
index 8628c83b..adf14793 100644
--- a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj
+++ b/Samples/0_Introduction/cudaOpenMP/cudaOpenMP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cudaOpenMP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/fp16ScalarProduct/Makefile b/Samples/0_Introduction/fp16ScalarProduct/Makefile
index c80fe84a..5dda1a89 100644
--- a/Samples/0_Introduction/fp16ScalarProduct/Makefile
+++ b/Samples/0_Introduction/fp16ScalarProduct/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 60 61 70 75 80 86
+SMS ?= 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml b/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml
index fed67440..045bce43 100644
--- a/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml
+++ b/Samples/0_Introduction/fp16ScalarProduct/NsightEclipse.xml
@@ -3,11 +3,11 @@
 <entry>
   <name>fp16ScalarProduct</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[Calculates scalar product of two vectors of FP16 numbers.]]></description>
@@ -44,6 +44,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>arm</arch>
diff --git a/Samples/0_Introduction/fp16ScalarProduct/README.md b/Samples/0_Introduction/fp16ScalarProduct/README.md
index 3875a40a..4aa2b89c 100644
--- a/Samples/0_Introduction/fp16ScalarProduct/README.md
+++ b/Samples/0_Introduction/fp16ScalarProduct/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [FP16](../../../README.md#fp16)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj
index f1199a8e..c4dbdc75 100644
--- a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj
+++ b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/fp16ScalarProduct.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj
index bcd5c50c..0b9a749f 100644
--- a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj
+++ b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/fp16ScalarProduct.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj
index c316ee62..ee4258a8 100644
--- a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj
+++ b/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/fp16ScalarProduct.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMul/Makefile b/Samples/0_Introduction/matrixMul/Makefile
index a8b38ccd..a4d336b5 100644
--- a/Samples/0_Introduction/matrixMul/Makefile
+++ b/Samples/0_Introduction/matrixMul/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/matrixMul/NsightEclipse.xml b/Samples/0_Introduction/matrixMul/NsightEclipse.xml
index e90b6c7e..3f517967 100644
--- a/Samples/0_Introduction/matrixMul/NsightEclipse.xml
+++ b/Samples/0_Introduction/matrixMul/NsightEclipse.xml
@@ -3,20 +3,20 @@
 <entry>
   <name>matrixMul</name>
   <cuda_api_list>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
+    <toolkit>cudaProfilerStop</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaProfilerStart</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaProfilerStop</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements matrix multiplication and is exactly the same as Chapter 6 of the programming guide. It has been written for clarity of exposition to illustrate various CUDA programming principles, not with the goal of providing the most performant generic kernel for matrix multiplication.  To illustrate GPU performance for matrix multiply, this sample also shows how to use the new CUDA 4.0 interface for CUBLAS to demonstrate high-performance performance for matrix multiplication.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -56,6 +56,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/matrixMul/README.md b/Samples/0_Introduction/matrixMul/README.md
index c558141a..b0e121b2 100644
--- a/Samples/0_Introduction/matrixMul/README.md
+++ b/Samples/0_Introduction/matrixMul/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaMallocHost, cudaProfilerStart, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaProfilerStop, cudaStreamCreateWithFlags, cudaEventDestroy, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaProfilerStop, cudaMalloc, cudaFree, cudaMallocHost, cudaProfilerStart, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaStreamSynchronize, cudaEventDestroy, cudaEventElapsedTime, cudaMemcpyAsync, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj b/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj
index 5bc23eb0..95f6a03a 100644
--- a/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj
+++ b/Samples/0_Introduction/matrixMul/matrixMul_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/matrixMul.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj b/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj
index 7373d385..375f668a 100644
--- a/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj
+++ b/Samples/0_Introduction/matrixMul/matrixMul_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/matrixMul.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj b/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj
index 36e7c4e9..e406cc03 100644
--- a/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj
+++ b/Samples/0_Introduction/matrixMul/matrixMul_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/matrixMul.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMulDrv/Makefile b/Samples/0_Introduction/matrixMulDrv/Makefile
index 794345b6..83476982 100644
--- a/Samples/0_Introduction/matrixMulDrv/Makefile
+++ b/Samples/0_Introduction/matrixMulDrv/Makefile
@@ -283,9 +283,9 @@ FATBIN_FILE := matrixMul_kernel${TARGET_SIZE}.fatbin
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/matrixMulDrv/README.md b/Samples/0_Introduction/matrixMulDrv/README.md
index 804e7d81..682fb940 100644
--- a/Samples/0_Introduction/matrixMulDrv/README.md
+++ b/Samples/0_Introduction/matrixMulDrv/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, Matrix Multiply
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemcpyHtoD, cuModuleLoadData, cuCtxCreate, cuLaunchKernel, cuDeviceGetName, cuMemAlloc, cuOccupancyMaxPotentialBlockSize, cuDeviceTotalMem, cuMemFree, cuMemcpyDtoH, cuCtxDestroy, cuDeviceGetAttribute
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuDeviceGetName, cuDeviceTotalMem, cuDeviceGetAttribute, cuModuleLoadData, cuOccupancyMaxPotentialBlockSize, cuMemAlloc, cuMemFree, cuCtxDestroy, cuModuleGetFunction, cuCtxCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj
index ff911452..73998761 100644
--- a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj
+++ b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/matrixMulDrv.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj
index 1f6b88ad..0805c97c 100644
--- a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj
+++ b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/matrixMulDrv.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj
index e92ce9ce..a82bb699 100644
--- a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj
+++ b/Samples/0_Introduction/matrixMulDrv/matrixMulDrv_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/matrixMulDrv.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/README.md b/Samples/0_Introduction/matrixMulDynlinkJIT/README.md
index 18350964..657811d3 100644
--- a/Samples/0_Introduction/matrixMulDynlinkJIT/README.md
+++ b/Samples/0_Introduction/matrixMulDynlinkJIT/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, CUDA Dynamically Linked Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuParamSetv, cuMemFree, cuInit, cuModuleGetFunction, cuCtxDestroy, cuCtxCreate, cuDeviceGetName, cuCtxSynchronize, cuParamSeti, cuModuleLoadDataEx, cuDeviceGet, cuFuncSetSharedSize, cuMemAlloc, cuDeviceComputeCapability, cuFuncSetBlockShape, cuMemcpyHtoD, cuParamSetSize, cuLaunchGrid, cuDeviceGetCount, cuLaunchKernel, cuMemcpyDtoH
+cuMemcpyDtoH, cuDeviceGetName, cuParamSeti, cuModuleLoadDataEx, cuModuleGetFunction, cuLaunchGrid, cuFuncSetSharedSize, cuMemFree, cuParamSetSize, cuParamSetv, cuInit, cuMemcpyHtoD, cuLaunchKernel, cuDeviceGet, cuFuncSetBlockShape, cuCtxDestroy, cuDeviceGetCount, cuDeviceComputeCapability, cuCtxSynchronize, cuMemAlloc, cuCtxCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h b/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h
index 5f69d332..4ca66fde 100644
--- a/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h
+++ b/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h
@@ -95,6 +95,7 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) {
       {0x80,  64},
       {0x86, 128},
       {0x87, 128},
+      {0x90, 128},
       {-1, -1}};
 
   int index = 0;
diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj
index da13462f..8b146bde 100644
--- a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj
+++ b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -116,6 +116,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj
index 460bc3de..3fc6842c 100644
--- a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj
+++ b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj
index d5ac5358..732e0b22 100644
--- a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj
+++ b/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMul_nvrtc/README.md b/Samples/0_Introduction/matrixMul_nvrtc/README.md
index 2cefe20e..224c3ee0 100644
--- a/Samples/0_Introduction/matrixMul_nvrtc/README.md
+++ b/Samples/0_Introduction/matrixMul_nvrtc/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
 
 ## Dependencies needed to build/run
 [NVRTC](../../../README.md#nvrtc)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj
index 1b4a7eb0..7833bb47 100644
--- a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj
+++ b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj
index cf0c66c8..d0b58366 100644
--- a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj
+++ b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -109,6 +109,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj
index f6dc2b6f..6fa7922e 100644
--- a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj
+++ b/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -109,6 +109,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/mergeSort/Makefile b/Samples/0_Introduction/mergeSort/Makefile
index ad45af87..815268b1 100644
--- a/Samples/0_Introduction/mergeSort/Makefile
+++ b/Samples/0_Introduction/mergeSort/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/mergeSort/NsightEclipse.xml b/Samples/0_Introduction/mergeSort/NsightEclipse.xml
index 0a77b65e..55cab906 100644
--- a/Samples/0_Introduction/mergeSort/NsightEclipse.xml
+++ b/Samples/0_Introduction/mergeSort/NsightEclipse.xml
@@ -4,9 +4,9 @@
   <name>mergeSort</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements a merge sort (also known as Batcher's sort), algorithms belonging to the class of sorting networks. While generally subefficient on large sequences compared to algorithms with better asymptotic algorithmic complexity (i.e. merge sort or radix sort), may be the algorithms of choice for sorting batches of short- to mid-sized (key, value) array pairs. Refer to the excellent tutorial by H. W. Lang http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/networks/indexen.htm]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/mergeSort/README.md b/Samples/0_Introduction/mergeSort/README.md
index d7ab7be2..d0853896 100644
--- a/Samples/0_Introduction/mergeSort/README.md
+++ b/Samples/0_Introduction/mergeSort/README.md
@@ -10,7 +10,7 @@ Data-Parallel Algorithms
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy
+cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj b/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj
index 9f5e39b3..0ef07013 100644
--- a/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj
+++ b/Samples/0_Introduction/mergeSort/mergeSort_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/mergeSort.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj b/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj
index 8639bd2e..5796dda5 100644
--- a/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj
+++ b/Samples/0_Introduction/mergeSort/mergeSort_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/mergeSort.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj b/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj
index c38e79d6..ed951e9f 100644
--- a/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj
+++ b/Samples/0_Introduction/mergeSort/mergeSort_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/mergeSort.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAWBarrier/Makefile b/Samples/0_Introduction/simpleAWBarrier/Makefile
index cd8dc51d..0fa1e665 100644
--- a/Samples/0_Introduction/simpleAWBarrier/Makefile
+++ b/Samples/0_Introduction/simpleAWBarrier/Makefile
@@ -316,9 +316,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 70 72 75 80 86 87
+SMS ?= 70 72 75 80 86 87 90
 else
-SMS ?= 70 75 80 86
+SMS ?= 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml b/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml
index e8738f6a..87414f89 100644
--- a/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleAWBarrier/NsightEclipse.xml
@@ -6,17 +6,17 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
-    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
-    <toolkit>cudaDeviceGetAttribute</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaLaunchCooperativeKernel</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaDeviceGetAttribute</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaLaunchCooperativeKernel</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
   </cuda_api_list>
   <description><![CDATA[A simple demonstration of arrive wait barriers.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -53,6 +53,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleAWBarrier/README.md b/Samples/0_Introduction/simpleAWBarrier/README.md
index d81ac1f8..064db83a 100644
--- a/Samples/0_Introduction/simpleAWBarrier/README.md
+++ b/Samples/0_Introduction/simpleAWBarrier/README.md
@@ -10,7 +10,7 @@ Arrive Wait Barrier
 
 ## Supported SM Architectures
 
-[SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMallocHost, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaOccupancyMaxPotentialBlockSize, cudaDeviceGetAttribute, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaLaunchCooperativeKernel, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaFree, cudaDeviceGetAttribute, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaLaunchCooperativeKernel, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaMemcpyAsync, cudaOccupancyMaxPotentialBlockSize
 
 ## Dependencies needed to build/run
 [CPP11](../../../README.md#cpp11), [MBCG](../../../README.md#mbcg)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj
index ea64526a..ed136540 100644
--- a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleAWBarrier.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj
index aaf046ed..eeddba29 100644
--- a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleAWBarrier.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj
index 28637338..85eb24bf 100644
--- a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleAWBarrier.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAssert/Makefile b/Samples/0_Introduction/simpleAssert/Makefile
index fb73574b..bd790aa6 100644
--- a/Samples/0_Introduction/simpleAssert/Makefile
+++ b/Samples/0_Introduction/simpleAssert/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleAssert/NsightEclipse.xml b/Samples/0_Introduction/simpleAssert/NsightEclipse.xml
index 7f9e81f5..2ba03ec6 100644
--- a/Samples/0_Introduction/simpleAssert/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleAssert/NsightEclipse.xml
@@ -41,6 +41,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleAssert/README.md b/Samples/0_Introduction/simpleAssert/README.md
index e5fbc1b3..05b753a1 100644
--- a/Samples/0_Introduction/simpleAssert/README.md
+++ b/Samples/0_Introduction/simpleAssert/README.md
@@ -10,7 +10,7 @@ Assert
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -27,7 +27,7 @@ cudaDeviceSynchronize, cudaGetErrorString
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj
index a033d7b0..731833f9 100644
--- a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleAssert.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj
index a3233782..88e2fa09 100644
--- a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleAssert.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj
index de3d8f03..ce935120 100644
--- a/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleAssert/simpleAssert_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleAssert.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/README.md b/Samples/0_Introduction/simpleAssert_nvrtc/README.md
index d0ecd7e0..72c5de11 100644
--- a/Samples/0_Introduction/simpleAssert_nvrtc/README.md
+++ b/Samples/0_Introduction/simpleAssert_nvrtc/README.md
@@ -10,7 +10,7 @@ Assert, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuCtxSynchronize, cuLaunchKernel
+cuModuleGetFunction, cuLaunchKernel, cuCtxSynchronize
 
 ## Dependencies needed to build/run
 [NVRTC](../../../README.md#nvrtc)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj
index 11b8003f..3fc089e5 100644
--- a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj
index 548b3baf..0714d837 100644
--- a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj
index 59d571ce..fc010fb0 100644
--- a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile b/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile
index 7b221fb2..b7222445 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml b/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml
index 093f108b..e9252d1c 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics/NsightEclipse.xml
@@ -3,12 +3,12 @@
 <entry>
   <name>simpleAtomicIntrinsics</name>
   <cuda_api_list>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
   </cuda_api_list>
   <description><![CDATA[A simple demonstration of global memory atomic instructions.]]></description>
@@ -48,6 +48,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/README.md b/Samples/0_Introduction/simpleAtomicIntrinsics/README.md
index 81693b71..0fa52781 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics/README.md
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics/README.md
@@ -10,7 +10,7 @@ Atomic Intrinsics
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj
index 87308a5d..d122ae68 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleAtomicIntrinsics.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj
index b99f4190..7f05dcc6 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleAtomicIntrinsics.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj
index bb0bb8df..7dd8d89e 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleAtomicIntrinsics.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md
index 0d1700a7..a53e822b 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md
@@ -10,7 +10,7 @@ Atomic Intrinsics, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 cudaBlockSize, cudaGridSize
@@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj
index c51f9939..9db171b1 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj
index 75b918e7..b43cec91 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj
index f7cb9e38..bd705f44 100644
--- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAttributes/Makefile b/Samples/0_Introduction/simpleAttributes/Makefile
index 00e9c4f0..e685dd69 100644
--- a/Samples/0_Introduction/simpleAttributes/Makefile
+++ b/Samples/0_Introduction/simpleAttributes/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml b/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml
index 5141efa1..fcad8235 100644
--- a/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleAttributes/NsightEclipse.xml
@@ -3,15 +3,15 @@
 <entry>
   <name>simpleAttributes</name>
   <cuda_api_list>
-    <toolkit>cudaDeviceSetLimit</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamSetAttribute</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaStreamSetAttribute</toolkit>
+    <toolkit>cudaDeviceSetLimit</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This CUDA Runtime API sample is a very basic example that implements how to use the stream attributes that affect L2 locality. Performance improvement due to use of L2 access policy window can only be noticed on Compute capability 8.0 or higher.]]></description>
@@ -49,6 +49,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleAttributes/README.md b/Samples/0_Introduction/simpleAttributes/README.md
index e5b6fa87..5dc1787b 100644
--- a/Samples/0_Introduction/simpleAttributes/README.md
+++ b/Samples/0_Introduction/simpleAttributes/README.md
@@ -10,7 +10,7 @@ Attributes usage on stream
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceSetLimit, cudaFree, cudaMallocHost, cudaStreamCreate, cudaFreeHost, cudaMalloc, cudaStreamSetAttribute, cudaMemcpyAsync, cudaStreamSynchronize, cudaGetDeviceProperties
+cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaStreamSetAttribute, cudaDeviceSetLimit, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj
index 2752b266..a446d3a9 100644
--- a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleAttributes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj
index 620fdca3..e49167d9 100644
--- a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleAttributes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj
index 9ab8f931..1eb61252 100644
--- a/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleAttributes/simpleAttributes_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleAttributes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCUDA2GL/Makefile b/Samples/0_Introduction/simpleCUDA2GL/Makefile
index 39042604..80e3250f 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/Makefile
+++ b/Samples/0_Introduction/simpleCUDA2GL/Makefile
@@ -311,9 +311,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml b/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml
index 55b9400c..1f40f86b 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleCUDA2GL/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>simpleCUDA2GL</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaMemcpyToArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaHostAlloc</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaProcess</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGraphicsSubResourceGetMappedArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterImage</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaMemcpyToArray</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaProcess</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaGraphicsSubResourceGetMappedArray</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
+    <toolkit>cudaGraphicsGLRegisterImage</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample shows how to copy CUDA image back to OpenGL using the most efficient methods.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -79,6 +79,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleCUDA2GL/README.md b/Samples/0_Introduction/simpleCUDA2GL/README.md
index c5f7bb6d..7c46fb7e 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/README.md
+++ b/Samples/0_Introduction/simpleCUDA2GL/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing, Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaMemcpyToArray, cudaGraphicsGLRegisterBuffer, cudaHostAlloc, cudaGraphicsResourceGetMappedPointer, cudaProcess, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGraphicsGLRegisterImage, cudaGraphicsUnmapResources
+cudaHostAlloc, cudaGraphicsUnmapResources, cudaMalloc, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaMemcpyToArray, cudaDeviceSynchronize, cudaProcess, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsGLRegisterImage
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk b/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk
+++ b/Samples/0_Introduction/simpleCUDA2GL/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj
index b1c84ae4..65865117 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleCUDA2GL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj
index 955b060a..19ad1de2 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUDA2GL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj
index 46e97f62..f2b071ec 100644
--- a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUDA2GL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCallback/Makefile b/Samples/0_Introduction/simpleCallback/Makefile
index fade686a..ff334a83 100644
--- a/Samples/0_Introduction/simpleCallback/Makefile
+++ b/Samples/0_Introduction/simpleCallback/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleCallback/NsightEclipse.xml b/Samples/0_Introduction/simpleCallback/NsightEclipse.xml
index 3ee3b90d..931c7c65 100644
--- a/Samples/0_Introduction/simpleCallback/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleCallback/NsightEclipse.xml
@@ -3,16 +3,16 @@
 <entry>
   <name>simpleCallback</name>
   <cuda_api_list>
+    <toolkit>cudaHostAlloc</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaHostAlloc</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaStreamAddCallback</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements multi-threaded heterogeneous computing workloads with the new CPU callbacks for CUDA streams and events introduced with CUDA 5.0.]]></description>
@@ -53,6 +53,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleCallback/README.md b/Samples/0_Introduction/simpleCallback/README.md
index 24c76c25..40368567 100644
--- a/Samples/0_Introduction/simpleCallback/README.md
+++ b/Samples/0_Introduction/simpleCallback/README.md
@@ -10,7 +10,7 @@ CUDA Streams, Callback Functions, Multithreading
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaHostAlloc, cudaGetDeviceCount, cudaFreeHost, cudaMalloc, cudaSetDevice, cudaMemcpyAsync, cudaStreamAddCallback, cudaGetDeviceProperties
+cudaHostAlloc, cudaStreamDestroy, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaFreeHost, cudaStreamAddCallback, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj
index 677ea6b2..ad8bf900 100644
--- a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleCallback.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj
index 0dcbadea..b200ba6f 100644
--- a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCallback.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj
index 6f2e491b..196f5794 100644
--- a/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleCallback/simpleCallback_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCallback.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCooperativeGroups/Makefile b/Samples/0_Introduction/simpleCooperativeGroups/Makefile
index c45b7332..22efbff3 100644
--- a/Samples/0_Introduction/simpleCooperativeGroups/Makefile
+++ b/Samples/0_Introduction/simpleCooperativeGroups/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml b/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml
index 54d9c4d3..939f68ad 100644
--- a/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleCooperativeGroups/NsightEclipse.xml
@@ -44,6 +44,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleCooperativeGroups/README.md b/Samples/0_Introduction/simpleCooperativeGroups/README.md
index 7e80f6bc..ab3e11cc 100644
--- a/Samples/0_Introduction/simpleCooperativeGroups/README.md
+++ b/Samples/0_Introduction/simpleCooperativeGroups/README.md
@@ -10,7 +10,7 @@ Cooperative Groups
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -27,7 +27,7 @@ cudaDeviceSynchronize, cudaGetErrorString
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj
index 4a7bac2a..061538d8 100644
--- a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleCooperativeGroups.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj
index 09d33159..bf17882a 100644
--- a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCooperativeGroups.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj
index 12759203..649221c2 100644
--- a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCooperativeGroups.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCubemapTexture/Makefile b/Samples/0_Introduction/simpleCubemapTexture/Makefile
index fdff3980..4c1fed17 100644
--- a/Samples/0_Introduction/simpleCubemapTexture/Makefile
+++ b/Samples/0_Introduction/simpleCubemapTexture/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml b/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml
index 625ed7a5..1bf6b010 100644
--- a/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleCubemapTexture/NsightEclipse.xml
@@ -3,17 +3,17 @@
 <entry>
   <name>simpleCubemapTexture</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaCreateChannelDesc</toolkit>
     <toolkit>cudaFreeArray</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaPitchedPtr</toolkit>
+    <toolkit>cudaPos</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
     <toolkit>cudaExtent</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaPitchedPtr</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaCreateChannelDesc</toolkit>
-    <toolkit>cudaPos</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[Simple example that demonstrates how to use a new CUDA 4.1 feature to support cubemap Textures in CUDA C.]]></description>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleCubemapTexture/README.md b/Samples/0_Introduction/simpleCubemapTexture/README.md
index 68f06947..44c3896f 100644
--- a/Samples/0_Introduction/simpleCubemapTexture/README.md
+++ b/Samples/0_Introduction/simpleCubemapTexture/README.md
@@ -10,7 +10,7 @@ Texture, Volume Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFreeArray, cudaExtent, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaPitchedPtr, cudaCreateTextureObject, cudaMalloc, cudaCreateChannelDesc, cudaPos, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaCreateChannelDesc, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaPos, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj
index 26ae9423..307c5282 100644
--- a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleCubemapTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj
index bcdaec2f..709f0362 100644
--- a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCubemapTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj
index 0dd40bfd..3c332afb 100644
--- a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCubemapTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleDrvRuntime/Makefile b/Samples/0_Introduction/simpleDrvRuntime/Makefile
index 3cbc5811..46593a89 100644
--- a/Samples/0_Introduction/simpleDrvRuntime/Makefile
+++ b/Samples/0_Introduction/simpleDrvRuntime/Makefile
@@ -283,9 +283,9 @@ FATBIN_FILE := vectorAdd_kernel${TARGET_SIZE}.fatbin
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleDrvRuntime/README.md b/Samples/0_Introduction/simpleDrvRuntime/README.md
index 74ea4ad9..158157e8 100644
--- a/Samples/0_Introduction/simpleDrvRuntime/README.md
+++ b/Samples/0_Introduction/simpleDrvRuntime/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, CUDA Runtime API, Vector Addition
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuCtxDestroy, cuModuleLoadData, cuCtxCreate, cuLaunchKernel, cuModuleUnload, cuInit, cuModuleGetFunction
+cuLaunchKernel, cuModuleLoadData, cuCtxDestroy, cuModuleUnload, cuModuleGetFunction, cuCtxCreate, cuInit
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj
index 92e427cd..019fc0cc 100644
--- a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleDrvRuntime.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj
index 64f8fab9..727c658d 100644
--- a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleDrvRuntime.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj
index 7f3d2b01..93b2ffad 100644
--- a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleDrvRuntime.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleHyperQ/Makefile b/Samples/0_Introduction/simpleHyperQ/Makefile
index 48018511..16140688 100644
--- a/Samples/0_Introduction/simpleHyperQ/Makefile
+++ b/Samples/0_Introduction/simpleHyperQ/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml b/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml
index 8777270a..1a503845 100644
--- a/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleHyperQ/NsightEclipse.xml
@@ -3,20 +3,20 @@
 <entry>
   <name>simpleHyperQ</name>
   <cuda_api_list>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
     <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates the use of CUDA streams for concurrent execution of several kernels on devices which provide HyperQ (SM 3.5).  Devices without HyperQ (SM 2.0 and SM 3.0) will run a maximum of two kernels concurrently.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -62,6 +62,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleHyperQ/README.md b/Samples/0_Introduction/simpleHyperQ/README.md
index 8527317e..467bc4b3 100644
--- a/Samples/0_Introduction/simpleHyperQ/README.md
+++ b/Samples/0_Introduction/simpleHyperQ/README.md
@@ -10,7 +10,7 @@ CUDA Systems Integration, Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaEventRecord, cudaMallocHost, cudaStreamCreate, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventDestroy, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaStreamDestroy, cudaMalloc, cudaFree, cudaMallocHost, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDevice, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj
index 1e4bedc3..d2bbd16a 100644
--- a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleHyperQ.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj
index b9180fa0..3a6cc72c 100644
--- a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleHyperQ.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj
index b641b971..c15d7eec 100644
--- a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleHyperQ/simpleHyperQ_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleHyperQ.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleIPC/Makefile b/Samples/0_Introduction/simpleIPC/Makefile
index 31945c1e..914f1ab6 100644
--- a/Samples/0_Introduction/simpleIPC/Makefile
+++ b/Samples/0_Introduction/simpleIPC/Makefile
@@ -303,9 +303,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleIPC/NsightEclipse.xml b/Samples/0_Introduction/simpleIPC/NsightEclipse.xml
index 23e3c5a7..d25608df 100644
--- a/Samples/0_Introduction/simpleIPC/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleIPC/NsightEclipse.xml
@@ -3,29 +3,29 @@
 <entry>
   <name>simpleIPC</name>
   <cuda_api_list>
-    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
-    <toolkit>cudaIpcOpenEventHandle</toolkit>
-    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaDeviceCanAccessPeer</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
     <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaIpcOpenMemHandle</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaIpcGetEventHandle</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaStreamWaitEvent</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaIpcCloseMemHandle</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaDeviceCanAccessPeer</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaIpcOpenEventHandle</toolkit>
+    <toolkit>cudaIpcOpenMemHandle</toolkit>
+    <toolkit>cudaIpcGetEventHandle</toolkit>
+    <toolkit>cudaStreamWaitEvent</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
     <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaIpcGetMemHandle</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This CUDA Runtime API sample is a very basic sample that demonstrates Inter Process Communication with one process per GPU for computation.  Requires Compute Capability 3.0 or higher and a Linux Operating System, or a Windows Operating System.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -71,6 +71,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <sources>
     <extracompilation>../../../Common/helper_multiprocess.cpp</extracompilation>
     <extraheader>../../../Common/helper_multiprocess.h</extraheader>
diff --git a/Samples/0_Introduction/simpleIPC/README.md b/Samples/0_Introduction/simpleIPC/README.md
index 1594c529..a9d3336b 100644
--- a/Samples/0_Introduction/simpleIPC/README.md
+++ b/Samples/0_Introduction/simpleIPC/README.md
@@ -10,7 +10,7 @@ CUDA Systems Integration, Peer to Peer, InterProcess Communication
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceEnablePeerAccess, cudaIpcOpenEventHandle, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaStreamCreateWithFlags, cudaDeviceCanAccessPeer, cudaMemcpyAsync, cudaStreamDestroy, cudaEventCreate, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaIpcOpenMemHandle, cudaGetDeviceProperties, cudaGetDeviceCount, cudaIpcGetEventHandle, cudaGetLastError, cudaStreamSynchronize, cudaStreamWaitEvent, cudaFree, cudaIpcCloseMemHandle, cudaEventRecord, cudaIpcGetMemHandle, cudaEventSynchronize
+cudaSetDevice, cudaIpcCloseMemHandle, cudaEventDestroy, cudaGetDeviceCount, cudaMemcpyAsync, cudaDeviceCanAccessPeer, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaGetLastError, cudaIpcOpenEventHandle, cudaIpcOpenMemHandle, cudaIpcGetEventHandle, cudaStreamWaitEvent, cudaEventCreate, cudaFree, cudaEventSynchronize, cudaEventRecord, cudaIpcGetMemHandle, cudaStreamSynchronize, cudaDeviceEnablePeerAccess, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [IPC](../../../README.md#ipc)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj
index 99d342ac..03771430 100644
--- a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleIPC.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj
index 1e507919..4d8096a2 100644
--- a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleIPC.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj
index d9e9f48d..df3aba1f 100644
--- a/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleIPC/simpleIPC_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleIPC.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleLayeredTexture/Makefile b/Samples/0_Introduction/simpleLayeredTexture/Makefile
index bd2660f6..eeb6d7ea 100644
--- a/Samples/0_Introduction/simpleLayeredTexture/Makefile
+++ b/Samples/0_Introduction/simpleLayeredTexture/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml b/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml
index 91481c99..ff2bc6f0 100644
--- a/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleLayeredTexture/NsightEclipse.xml
@@ -3,17 +3,17 @@
 <entry>
   <name>simpleLayeredTexture</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaCreateChannelDesc</toolkit>
     <toolkit>cudaFreeArray</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaPitchedPtr</toolkit>
+    <toolkit>cudaPos</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
     <toolkit>cudaExtent</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaPitchedPtr</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaCreateChannelDesc</toolkit>
-    <toolkit>cudaPos</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[Simple example that demonstrates how to use a new CUDA 4.0 feature to support layered Textures in CUDA C.]]></description>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleLayeredTexture/README.md b/Samples/0_Introduction/simpleLayeredTexture/README.md
index 3add6778..5dc0eb71 100644
--- a/Samples/0_Introduction/simpleLayeredTexture/README.md
+++ b/Samples/0_Introduction/simpleLayeredTexture/README.md
@@ -10,7 +10,7 @@ Texture, Volume Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFreeArray, cudaExtent, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaPitchedPtr, cudaCreateTextureObject, cudaMalloc, cudaCreateChannelDesc, cudaPos, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaCreateChannelDesc, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaPos, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj
index 71e3a84c..ee1e3e42 100644
--- a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleLayeredTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj
index fe147d3e..3ae1a4f9 100644
--- a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleLayeredTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj
index a99c2ee3..0d29aae5 100644
--- a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleLayeredTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleMPI/Makefile b/Samples/0_Introduction/simpleMPI/Makefile
index 49fc56c3..8726e03d 100644
--- a/Samples/0_Introduction/simpleMPI/Makefile
+++ b/Samples/0_Introduction/simpleMPI/Makefile
@@ -335,9 +335,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleMPI/README.md b/Samples/0_Introduction/simpleMPI/README.md
index 6f56a03d..5e0f97fa 100644
--- a/Samples/0_Introduction/simpleMPI/README.md
+++ b/Samples/0_Introduction/simpleMPI/README.md
@@ -10,7 +10,7 @@ CUDA Systems Integration, MPI, Multithreading
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaGetLastError, cudaFree, cudaMemcpy
+cudaMalloc, cudaGetLastError, cudaMemcpy, cudaFree
 
 ## Dependencies needed to build/run
 [MPI](../../../README.md#mpi)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj
index 4e1777bc..94e77612 100644
--- a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleMPI.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj
index cf5e568b..97822220 100644
--- a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleMPI.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj
index 2959c87e..8f6ea5ae 100644
--- a/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleMPI/simpleMPI_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleMPI.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleMultiCopy/Makefile b/Samples/0_Introduction/simpleMultiCopy/Makefile
index 26974b35..d6d253c6 100644
--- a/Samples/0_Introduction/simpleMultiCopy/Makefile
+++ b/Samples/0_Introduction/simpleMultiCopy/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml b/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml
index ca79562c..bb76ce8c 100644
--- a/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleMultiCopy/NsightEclipse.xml
@@ -3,22 +3,22 @@
 <entry>
   <name>simpleMultiCopy</name>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaHostAlloc</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[Supported in GPUs with Compute Capability 1.1, overlapping compute with one memcopy is possible from the host system.  For Quadro and Tesla GPUs with Compute Capability 2.0, a second overlapped copy operation in either direction at full speed is possible (PCI-e is symmetric).  This sample illustrates the usage of CUDA streams to achieve overlapping of kernel execution with data copies to and from the device.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -66,6 +66,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleMultiCopy/README.md b/Samples/0_Introduction/simpleMultiCopy/README.md
index 8f015c26..72404287 100644
--- a/Samples/0_Introduction/simpleMultiCopy/README.md
+++ b/Samples/0_Introduction/simpleMultiCopy/README.md
@@ -10,7 +10,7 @@ CUDA Streams and Events, Asynchronous Data Transfers, Overlap Compute and Copy,
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaStreamDestroy, cudaEventRecord, cudaStreamCreate, cudaHostAlloc, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpyAsync, cudaGetDeviceProperties
+cudaHostAlloc, cudaStreamDestroy, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaSetDevice, cudaEventSynchronize, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaMemset, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj
index 86ccf67a..8fbcf08c 100644
--- a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleMultiCopy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj
index d1a1609f..1f77866f 100644
--- a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleMultiCopy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj
index 5251d592..447b6331 100644
--- a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleMultiCopy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleMultiGPU/Makefile b/Samples/0_Introduction/simpleMultiGPU/Makefile
index 6db255e4..15d13dde 100644
--- a/Samples/0_Introduction/simpleMultiGPU/Makefile
+++ b/Samples/0_Introduction/simpleMultiGPU/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml b/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml
index 500fc9ea..a1e377e5 100644
--- a/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleMultiGPU/NsightEclipse.xml
@@ -6,13 +6,13 @@
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
   </cuda_api_list>
   <description><![CDATA[This application demonstrates how to use the new CUDA 4.0 API for CUDA context management and multi-threaded access to run CUDA kernels on multiple-GPUs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -53,6 +53,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleMultiGPU/README.md b/Samples/0_Introduction/simpleMultiGPU/README.md
index 0f8464c2..284904f8 100644
--- a/Samples/0_Introduction/simpleMultiGPU/README.md
+++ b/Samples/0_Introduction/simpleMultiGPU/README.md
@@ -10,7 +10,7 @@ Asynchronous Data Transfers, CUDA Streams and Events, Multithreading, Multi-GPU
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaMallocHost, cudaStreamCreate, cudaGetDeviceCount, cudaFreeHost, cudaMalloc, cudaSetDevice, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamDestroy, cudaFree, cudaMallocHost, cudaSetDevice, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceCount
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj
index bcc574be..a025b2ec 100644
--- a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleMultiGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj
index 30a6f199..2a6ce253 100644
--- a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleMultiGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj
index 6fd4139c..315059ca 100644
--- a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleMultiGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleOccupancy/Makefile b/Samples/0_Introduction/simpleOccupancy/Makefile
index 85aa3c9f..b735ec0c 100644
--- a/Samples/0_Introduction/simpleOccupancy/Makefile
+++ b/Samples/0_Introduction/simpleOccupancy/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml b/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml
index 34577846..e4383b1c 100644
--- a/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleOccupancy/NsightEclipse.xml
@@ -3,17 +3,17 @@
 <entry>
   <name>simpleOccupancy</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates the basic usage of the CUDA occupancy calculator and occupancy-based launch configurator APIs by launching a kernel with the launch configurator, and measures the utilization difference against a manually configured launch.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleOccupancy/README.md b/Samples/0_Introduction/simpleOccupancy/README.md
index 4ca96acf..ddc12f2a 100644
--- a/Samples/0_Introduction/simpleOccupancy/README.md
+++ b/Samples/0_Introduction/simpleOccupancy/README.md
@@ -10,7 +10,7 @@ Occupancy Calculator
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaEventCreate, cudaOccupancyMaxPotentialBlockSize, cudaEventElapsedTime, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMalloc, cudaEventElapsedTime, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties, cudaOccupancyMaxPotentialBlockSize, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj
index ee3e8ca5..d4d97a02 100644
--- a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleOccupancy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj
index a0db9b8e..096cea4a 100644
--- a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleOccupancy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj
index 312b5e69..57de8a55 100644
--- a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleOccupancy/simpleOccupancy_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleOccupancy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleP2P/Makefile b/Samples/0_Introduction/simpleP2P/Makefile
index 036ff0d8..804aa449 100644
--- a/Samples/0_Introduction/simpleP2P/Makefile
+++ b/Samples/0_Introduction/simpleP2P/Makefile
@@ -303,9 +303,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleP2P/NsightEclipse.xml b/Samples/0_Introduction/simpleP2P/NsightEclipse.xml
index 69fc274d..65fe83bb 100644
--- a/Samples/0_Introduction/simpleP2P/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleP2P/NsightEclipse.xml
@@ -3,23 +3,23 @@
 <entry>
   <name>simpleP2P</name>
   <cuda_api_list>
-    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventCreateWithFlags</toolkit>
-    <toolkit>cudaDeviceCanAccessPeer</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaDeviceDisablePeerAccess</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaEventCreateWithFlags</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaDeviceDisablePeerAccess</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaDeviceCanAccessPeer</toolkit>
   </cuda_api_list>
   <description><![CDATA[This application demonstrates CUDA APIs that support Peer-To-Peer (P2P) copies, Peer-To-Peer (P2P) addressing, and Unified Virtual Memory Addressing (UVA) between multiple GPUs. In general, P2P is supported between two same GPUs with some exceptions, such as some Tesla and Quadro GPUs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -67,6 +67,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleP2P/README.md b/Samples/0_Introduction/simpleP2P/README.md
index cbe3b252..56b4b8bf 100644
--- a/Samples/0_Introduction/simpleP2P/README.md
+++ b/Samples/0_Introduction/simpleP2P/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Asynchronous Data Transfers, Unified Virtual Address Spa
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceEnablePeerAccess, cudaFree, cudaEventRecord, cudaMallocHost, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventCreateWithFlags, cudaDeviceCanAccessPeer, cudaEventDestroy, cudaSetDevice, cudaDeviceDisablePeerAccess, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaMalloc, cudaFree, cudaMallocHost, cudaEventCreateWithFlags, cudaSetDevice, cudaEventSynchronize, cudaDeviceDisablePeerAccess, cudaGetDeviceCount, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDeviceProperties, cudaDeviceEnablePeerAccess, cudaEventDestroy, cudaEventElapsedTime, cudaDeviceCanAccessPeer
 
 ## Dependencies needed to build/run
 [only-64-bit](../../../README.md#only-64-bit)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj
index aea119d9..41efff17 100644
--- a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleP2P.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj
index af3b8074..d51f6d7b 100644
--- a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleP2P.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj
index ea28f070..9ed32164 100644
--- a/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleP2P/simpleP2P_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleP2P.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simplePitchLinearTexture/Makefile b/Samples/0_Introduction/simplePitchLinearTexture/Makefile
index 32d993f2..98218e83 100644
--- a/Samples/0_Introduction/simplePitchLinearTexture/Makefile
+++ b/Samples/0_Introduction/simplePitchLinearTexture/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simplePitchLinearTexture/NsightEclipse.xml b/Samples/0_Introduction/simplePitchLinearTexture/NsightEclipse.xml
index e1082063..be7882b8 100644
--- a/Samples/0_Introduction/simplePitchLinearTexture/NsightEclipse.xml
+++ b/Samples/0_Introduction/simplePitchLinearTexture/NsightEclipse.xml
@@ -3,18 +3,18 @@
 <entry>
   <name>simplePitchLinearTexture</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMemcpyToArray</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMallocPitch</toolkit>
     <toolkit>cudaDestroyTextureObject</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaMallocPitch</toolkit>
+    <toolkit>cudaMemcpyToArray</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
     <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[Use of Pitch Linear Textures]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -54,6 +54,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simplePitchLinearTexture/README.md b/Samples/0_Introduction/simplePitchLinearTexture/README.md
index dc437cf2..95944a88 100644
--- a/Samples/0_Introduction/simplePitchLinearTexture/README.md
+++ b/Samples/0_Introduction/simplePitchLinearTexture/README.md
@@ -10,7 +10,7 @@ Texture, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMemcpyToArray, cudaEventRecord, cudaFreeArray, cudaEventCreate, cudaEventElapsedTime, cudaDestroyTextureObject, cudaEventSynchronize, cudaMallocPitch, cudaCreateTextureObject, cudaEventDestroy, cudaMallocArray
+cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaDestroyTextureObject, cudaEventSynchronize, cudaMemcpyToArray, cudaEventRecord, cudaCreateTextureObject, cudaEventDestroy, cudaEventElapsedTime, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2017.vcxproj b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2017.vcxproj
index ae30718f..f33a061e 100644
--- a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2017.vcxproj
+++ b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simplePitchLinearTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2019.vcxproj b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2019.vcxproj
index 60bf7a61..c08e4de3 100644
--- a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2019.vcxproj
+++ b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simplePitchLinearTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2022.vcxproj b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2022.vcxproj
index e964badc..441ae1c9 100644
--- a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2022.vcxproj
+++ b/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simplePitchLinearTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simplePrintf/Makefile b/Samples/0_Introduction/simplePrintf/Makefile
index e1297aa6..3b8cf8a0 100644
--- a/Samples/0_Introduction/simplePrintf/Makefile
+++ b/Samples/0_Introduction/simplePrintf/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simplePrintf/README.md b/Samples/0_Introduction/simplePrintf/README.md
index fd29cf26..872faf89 100644
--- a/Samples/0_Introduction/simplePrintf/README.md
+++ b/Samples/0_Introduction/simplePrintf/README.md
@@ -10,7 +10,7 @@ Debugging
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceSynchronize, cudaGetDeviceProperties, cudaGetDevice
+cudaGetDeviceProperties, cudaDeviceSynchronize, cudaGetDevice
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2017.vcxproj b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2017.vcxproj
index e06b0f93..861f30b9 100644
--- a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2017.vcxproj
+++ b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simplePrintf.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2019.vcxproj b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2019.vcxproj
index 84c7cfbb..6dcb3c5e 100644
--- a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2019.vcxproj
+++ b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simplePrintf.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2022.vcxproj b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2022.vcxproj
index 039c5fb2..e45b5953 100644
--- a/Samples/0_Introduction/simplePrintf/simplePrintf_vs2022.vcxproj
+++ b/Samples/0_Introduction/simplePrintf/simplePrintf_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simplePrintf.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleSeparateCompilation/Makefile b/Samples/0_Introduction/simpleSeparateCompilation/Makefile
index 59116ae7..4a92e480 100644
--- a/Samples/0_Introduction/simpleSeparateCompilation/Makefile
+++ b/Samples/0_Introduction/simpleSeparateCompilation/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleSeparateCompilation/NsightEclipse.xml b/Samples/0_Introduction/simpleSeparateCompilation/NsightEclipse.xml
index 586ab477..2f1c15ab 100644
--- a/Samples/0_Introduction/simpleSeparateCompilation/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleSeparateCompilation/NsightEclipse.xml
@@ -12,11 +12,11 @@
     </static>
   </compilations>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaMemcpyFromSymbol</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaMalloc</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates a CUDA 5.0 feature, the ability to create a GPU device static library and use it within another CUDA kernel.  This example demonstrates how to pass in a GPU device function (from the GPU device static library) as a function pointer to be called.  This sample requires devices with compute capability 2.0 or higher.]]></description>
   <devicecompilation>separate</devicecompilation>
@@ -56,6 +56,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleSeparateCompilation/README.md b/Samples/0_Introduction/simpleSeparateCompilation/README.md
index b83c4f5c..a4b54487 100644
--- a/Samples/0_Introduction/simpleSeparateCompilation/README.md
+++ b/Samples/0_Introduction/simpleSeparateCompilation/README.md
@@ -10,7 +10,7 @@ Separate Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaMemcpyFromSymbol
+cudaMemcpy, cudaMemcpyFromSymbol, cudaFree, cudaGetLastError, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2017.vcxproj b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2017.vcxproj
index f8293c63..fc05d0f3 100644
--- a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleSeparateCompilation.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2019.vcxproj b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2019.vcxproj
index db4e0716..d6f50cf4 100644
--- a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleSeparateCompilation.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2022.vcxproj b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2022.vcxproj
index 07e55c2b..758766ef 100644
--- a/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleSeparateCompilation/simpleSeparateCompilation_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleSeparateCompilation.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleStreams/Makefile b/Samples/0_Introduction/simpleStreams/Makefile
index 0c088686..0e83a307 100644
--- a/Samples/0_Introduction/simpleStreams/Makefile
+++ b/Samples/0_Introduction/simpleStreams/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleStreams/NsightEclipse.xml b/Samples/0_Introduction/simpleStreams/NsightEclipse.xml
index b2b5aff6..d7fb6d38 100644
--- a/Samples/0_Introduction/simpleStreams/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleStreams/NsightEclipse.xml
@@ -4,25 +4,25 @@
   <name>simpleStreams</name>
   <cuda_api_list>
     <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaHostAlloc</toolkit>
-    <toolkit>cudaHostRegister</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventCreateWithFlags</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
     <toolkit>cudaSetDeviceFlags</toolkit>
     <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
     <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaEventCreateWithFlags</toolkit>
     <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaHostAlloc</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaHostRegister</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
     <toolkit>cudaHostUnregister</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample uses CUDA streams to overlap kernel executions with memory copies between the host and a GPU device.  This sample uses a new CUDA 4.0 feature that supports pinning of generic host memory.  Requires Compute Capability 2.0 or higher.]]></description>
@@ -61,6 +61,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleStreams/README.md b/Samples/0_Introduction/simpleStreams/README.md
index 7c33b907..a9de18f8 100644
--- a/Samples/0_Introduction/simpleStreams/README.md
+++ b/Samples/0_Introduction/simpleStreams/README.md
@@ -10,7 +10,7 @@ Asynchronous Data Transfers, CUDA Streams and Events
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpy, cudaMemcpyAsync, cudaStreamDestroy, cudaMallocHost, cudaHostAlloc, cudaHostRegister, cudaMalloc, cudaEventCreateWithFlags, cudaEventDestroy, cudaSetDeviceFlags, cudaSetDevice, cudaGetDeviceProperties, cudaGetDeviceCount, cudaEventElapsedTime, cudaMemset, cudaFree, cudaEventRecord, cudaStreamCreate, cudaEventSynchronize, cudaFreeHost, cudaHostUnregister
+cudaMemcpy, cudaSetDeviceFlags, cudaSetDevice, cudaEventDestroy, cudaStreamCreate, cudaMallocHost, cudaEventCreateWithFlags, cudaFreeHost, cudaMemcpyAsync, cudaGetDeviceCount, cudaStreamDestroy, cudaMemset, cudaEventElapsedTime, cudaHostAlloc, cudaFree, cudaHostRegister, cudaEventSynchronize, cudaEventRecord, cudaMalloc, cudaGetDeviceProperties, cudaHostUnregister
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2017.vcxproj b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2017.vcxproj
index 1aae760f..b0fc51da 100644
--- a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleStreams.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2019.vcxproj b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2019.vcxproj
index f950cfd8..6b96b6b6 100644
--- a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleStreams.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2022.vcxproj b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2022.vcxproj
index 58020040..cf0c0e4d 100644
--- a/Samples/0_Introduction/simpleStreams/simpleStreams_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleStreams/simpleStreams_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleStreams.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleSurfaceWrite/Makefile b/Samples/0_Introduction/simpleSurfaceWrite/Makefile
index 024e0d4c..7440eee7 100644
--- a/Samples/0_Introduction/simpleSurfaceWrite/Makefile
+++ b/Samples/0_Introduction/simpleSurfaceWrite/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleSurfaceWrite/NsightEclipse.xml b/Samples/0_Introduction/simpleSurfaceWrite/NsightEclipse.xml
index 182fad40..4d9153cd 100644
--- a/Samples/0_Introduction/simpleSurfaceWrite/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleSurfaceWrite/NsightEclipse.xml
@@ -6,18 +6,18 @@
     <clean>output.pgm</clean>
   </cleanextras>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMemcpyToArray</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaCreateChannelDesc</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaDestroySurfaceObject</toolkit>
-    <toolkit>cudaCreateSurfaceObject</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaMemcpyToArray</toolkit>
+    <toolkit>cudaDestroySurfaceObject</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaCreateSurfaceObject</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaCreateChannelDesc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[Simple example that demonstrates the use of 2D surface references (Write-to-Texture)]]></description>
@@ -61,6 +61,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleSurfaceWrite/README.md b/Samples/0_Introduction/simpleSurfaceWrite/README.md
index a93a9d6a..944cf2fc 100644
--- a/Samples/0_Introduction/simpleSurfaceWrite/README.md
+++ b/Samples/0_Introduction/simpleSurfaceWrite/README.md
@@ -10,7 +10,7 @@ Texture, Surface Writes, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMemcpyToArray, cudaFreeArray, cudaDestroySurfaceObject, cudaCreateSurfaceObject, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaCreateChannelDesc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaFree, cudaDestroyTextureObject, cudaMemcpyToArray, cudaDestroySurfaceObject, cudaDeviceSynchronize, cudaCreateSurfaceObject, cudaCreateTextureObject, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2017.vcxproj b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2017.vcxproj
index c076e271..8a65d23a 100644
--- a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleSurfaceWrite.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2019.vcxproj b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2019.vcxproj
index c9fdbc59..d61b8689 100644
--- a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleSurfaceWrite.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2022.vcxproj b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2022.vcxproj
index 1b8e29b3..d5275f35 100644
--- a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleSurfaceWrite.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTemplates/Makefile b/Samples/0_Introduction/simpleTemplates/Makefile
index 78ad03d3..beac3d79 100644
--- a/Samples/0_Introduction/simpleTemplates/Makefile
+++ b/Samples/0_Introduction/simpleTemplates/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleTemplates/NsightEclipse.xml b/Samples/0_Introduction/simpleTemplates/NsightEclipse.xml
index 343057ce..36149261 100644
--- a/Samples/0_Introduction/simpleTemplates/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleTemplates/NsightEclipse.xml
@@ -4,9 +4,9 @@
   <name>simpleTemplates</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample is a templatized version of the template project. It also shows how to correctly templatize dynamically allocated shared memory arrays.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -42,6 +42,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleTemplates/README.md b/Samples/0_Introduction/simpleTemplates/README.md
index 83a67103..0db67151 100644
--- a/Samples/0_Introduction/simpleTemplates/README.md
+++ b/Samples/0_Introduction/simpleTemplates/README.md
@@ -10,7 +10,7 @@ C++ Templates
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaMemcpy, cudaGetDeviceProperties
+cudaMalloc, cudaMemcpy, cudaGetDeviceProperties, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2017.vcxproj b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2017.vcxproj
index b78a4c18..96cca985 100644
--- a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleTemplates.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2019.vcxproj b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2019.vcxproj
index 9c5ec6d8..93969038 100644
--- a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleTemplates.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2022.vcxproj b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2022.vcxproj
index 0929e9be..90b222be 100644
--- a/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleTemplates/simpleTemplates_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleTemplates.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTemplates_nvrtc/README.md b/Samples/0_Introduction/simpleTemplates_nvrtc/README.md
index ae309261..31c588ee 100644
--- a/Samples/0_Introduction/simpleTemplates_nvrtc/README.md
+++ b/Samples/0_Introduction/simpleTemplates_nvrtc/README.md
@@ -10,7 +10,7 @@ C++ Templates, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
 
 ## Dependencies needed to build/run
 [NVRTC](../../../README.md#nvrtc)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2017.vcxproj
index 0c2dfa96..1655b536 100644
--- a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2019.vcxproj
index 08f3653a..6d28d7aa 100644
--- a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2022.vcxproj
index c139a274..0c8ae969 100644
--- a/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleTemplates_nvrtc/simpleTemplates_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTexture/Makefile b/Samples/0_Introduction/simpleTexture/Makefile
index 046207e9..e705cef8 100644
--- a/Samples/0_Introduction/simpleTexture/Makefile
+++ b/Samples/0_Introduction/simpleTexture/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleTexture/NsightEclipse.xml b/Samples/0_Introduction/simpleTexture/NsightEclipse.xml
index a4fbab4b..0f029aea 100644
--- a/Samples/0_Introduction/simpleTexture/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleTexture/NsightEclipse.xml
@@ -6,16 +6,16 @@
     <clean>./data/teapot512_bw_out.pgm</clean>
   </cleanextras>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMemcpyToArray</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaCreateChannelDesc</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaMemcpyToArray</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaCreateChannelDesc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[Simple example that demonstrates use of Textures in CUDA.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -59,6 +59,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleTexture/README.md b/Samples/0_Introduction/simpleTexture/README.md
index 8577f337..834d4ee8 100644
--- a/Samples/0_Introduction/simpleTexture/README.md
+++ b/Samples/0_Introduction/simpleTexture/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API, Texture, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMemcpyToArray, cudaFreeArray, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaCreateChannelDesc, cudaMemcpy
+cudaMemcpy, cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaFree, cudaDestroyTextureObject, cudaMemcpyToArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2017.vcxproj b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2017.vcxproj
index 544d7b22..c12f6f17 100644
--- a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2019.vcxproj b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2019.vcxproj
index 034f2890..6c76b4ab 100644
--- a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2022.vcxproj b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2022.vcxproj
index 04f2a44d..5562ce0b 100644
--- a/Samples/0_Introduction/simpleTexture/simpleTexture_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleTexture/simpleTexture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTexture3D/Makefile b/Samples/0_Introduction/simpleTexture3D/Makefile
index fa6da461..f232cfff 100644
--- a/Samples/0_Introduction/simpleTexture3D/Makefile
+++ b/Samples/0_Introduction/simpleTexture3D/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleTexture3D/NsightEclipse.xml b/Samples/0_Introduction/simpleTexture3D/NsightEclipse.xml
index de66d67b..56848594 100644
--- a/Samples/0_Introduction/simpleTexture3D/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleTexture3D/NsightEclipse.xml
@@ -3,20 +3,20 @@
 <entry>
   <name>simpleTexture3D</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaExtent</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaPitchedPtr</toolkit>
-    <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaPitchedPtr</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaExtent</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[Simple example that demonstrates use of 3D Textures in CUDA.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -79,6 +79,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleTexture3D/README.md b/Samples/0_Introduction/simpleTexture3D/README.md
index 7ea427e7..de889b8b 100644
--- a/Samples/0_Introduction/simpleTexture3D/README.md
+++ b/Samples/0_Introduction/simpleTexture3D/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing, 3D Textures, Surface Writes
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaExtent, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaPitchedPtr, cudaCreateTextureObject, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy
+cudaGraphicsUnmapResources, cudaMemcpy, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleTexture3D/findgllib.mk b/Samples/0_Introduction/simpleTexture3D/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/0_Introduction/simpleTexture3D/findgllib.mk
+++ b/Samples/0_Introduction/simpleTexture3D/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2017.vcxproj b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2017.vcxproj
index 9fefb601..ed90a63b 100644
--- a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleTexture3D.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2019.vcxproj b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2019.vcxproj
index 23518ada..be0fa981 100644
--- a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleTexture3D.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2022.vcxproj b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2022.vcxproj
index dd4ab2af..1dd427b0 100644
--- a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleTexture3D.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTextureDrv/Makefile b/Samples/0_Introduction/simpleTextureDrv/Makefile
index bca14838..95ff9ffe 100644
--- a/Samples/0_Introduction/simpleTextureDrv/Makefile
+++ b/Samples/0_Introduction/simpleTextureDrv/Makefile
@@ -283,9 +283,9 @@ FATBIN_FILE := simpleTexture_kernel${TARGET_SIZE}.fatbin
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(GENCODE_FLAGS),)
diff --git a/Samples/0_Introduction/simpleTextureDrv/README.md b/Samples/0_Introduction/simpleTextureDrv/README.md
index 3457bfbe..ee28ee7f 100644
--- a/Samples/0_Introduction/simpleTextureDrv/README.md
+++ b/Samples/0_Introduction/simpleTextureDrv/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, Texture, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuTexObjectDestroy, cuModuleLoadData, cuCtxCreate, cuArrayCreate, cuDeviceGetName, cuLaunchKernel, cuMemAlloc, cuCtxSynchronize, cuArrayDestroy, cuTexObjectCreate, cuMemFree, cuMemcpyDtoH, cuCtxDestroy, cuDeviceGetAttribute
+cuMemcpyDtoH, cuLaunchKernel, cuModuleLoadData, cuDeviceGetName, cuDeviceGetAttribute, cuCtxSynchronize, cuMemAlloc, cuArrayCreate, cuMemFree, cuCtxDestroy, cuTexObjectDestroy, cuTexObjectCreate, cuCtxCreate, cuModuleGetFunction, cuArrayDestroy
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2017.vcxproj b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2017.vcxproj
index d674efc6..66dc7b33 100644
--- a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleTextureDrv.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2019.vcxproj b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2019.vcxproj
index 5817eb47..e1c43716 100644
--- a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleTextureDrv.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2022.vcxproj b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2022.vcxproj
index 0cbde7a7..21cf1f35 100644
--- a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleTextureDrv.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/Makefile b/Samples/0_Introduction/simpleVoteIntrinsics/Makefile
index 82e01279..32edcf72 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics/Makefile
+++ b/Samples/0_Introduction/simpleVoteIntrinsics/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/NsightEclipse.xml b/Samples/0_Introduction/simpleVoteIntrinsics/NsightEclipse.xml
index b424859f..e91b9714 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleVoteIntrinsics/NsightEclipse.xml
@@ -3,10 +3,10 @@
 <entry>
   <name>simpleVoteIntrinsics</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[Simple program which demonstrates how to use the Vote (__any_sync, __all_sync) intrinsic instruction in a CUDA kernel.]]></description>
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/README.md b/Samples/0_Introduction/simpleVoteIntrinsics/README.md
index e35dece3..9c86c635 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics/README.md
+++ b/Samples/0_Introduction/simpleVoteIntrinsics/README.md
@@ -10,7 +10,7 @@ Vote Intrinsics
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj
index c49388da..d9045a58 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleVoteIntrinsics.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj
index 91da2af0..3bcc1472 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleVoteIntrinsics.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2022.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2022.vcxproj
index 980c93b2..3af7fc24 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleVoteIntrinsics.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/README.md b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/README.md
index 28801cbd..2f4cdeb4 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/README.md
+++ b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/README.md
@@ -10,7 +10,7 @@ Vote Intrinsics, CUDA Driver API, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
 
 ## Dependencies needed to build/run
 [NVRTC](../../../README.md#nvrtc)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2017.vcxproj
index a0d07492..0d541d12 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2019.vcxproj
index 00d7d275..2c334e59 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2022.vcxproj
index c8c5ff0b..69dbd968 100644
--- a/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleVoteIntrinsics_nvrtc/simpleVoteIntrinsics_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleZeroCopy/Makefile b/Samples/0_Introduction/simpleZeroCopy/Makefile
index 8fe33e69..3161f90b 100644
--- a/Samples/0_Introduction/simpleZeroCopy/Makefile
+++ b/Samples/0_Introduction/simpleZeroCopy/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/simpleZeroCopy/NsightEclipse.xml b/Samples/0_Introduction/simpleZeroCopy/NsightEclipse.xml
index 155731c7..a5776e59 100644
--- a/Samples/0_Introduction/simpleZeroCopy/NsightEclipse.xml
+++ b/Samples/0_Introduction/simpleZeroCopy/NsightEclipse.xml
@@ -4,15 +4,15 @@
   <name>simpleZeroCopy</name>
   <cuda_api_list>
     <toolkit>cudaHostAlloc</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaSetDeviceFlags</toolkit>
     <toolkit>cudaHostRegister</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaHostGetDevicePointer</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaHostUnregister</toolkit>
-    <toolkit>cudaSetDeviceFlags</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaHostGetDevicePointer</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaHostUnregister</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample illustrates how to use Zero MemCopy, kernels can read and write directly to pinned system memory.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -51,6 +51,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/simpleZeroCopy/README.md b/Samples/0_Introduction/simpleZeroCopy/README.md
index 640a3096..a2f5acc8 100644
--- a/Samples/0_Introduction/simpleZeroCopy/README.md
+++ b/Samples/0_Introduction/simpleZeroCopy/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Pinned System Paged Memory, Vector Addition
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaHostAlloc, cudaGetDeviceCount, cudaHostRegister, cudaDeviceSynchronize, cudaFreeHost, cudaHostUnregister, cudaSetDeviceFlags, cudaSetDevice, cudaHostGetDevicePointer, cudaGetDeviceProperties
+cudaHostAlloc, cudaSetDeviceFlags, cudaHostRegister, cudaSetDevice, cudaGetDeviceCount, cudaHostGetDevicePointer, cudaDeviceSynchronize, cudaFreeHost, cudaGetDeviceProperties, cudaHostUnregister
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj
index 46bb04d3..b7e0b9c0 100644
--- a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj
+++ b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleZeroCopy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj
index 17ea198b..c7a9daed 100644
--- a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj
+++ b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleZeroCopy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2022.vcxproj b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2022.vcxproj
index 6f4d0c20..29709586 100644
--- a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2022.vcxproj
+++ b/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleZeroCopy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/systemWideAtomics/Makefile b/Samples/0_Introduction/systemWideAtomics/Makefile
index e8019879..6832e615 100644
--- a/Samples/0_Introduction/systemWideAtomics/Makefile
+++ b/Samples/0_Introduction/systemWideAtomics/Makefile
@@ -303,9 +303,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 61 70 72 75 80 86 87
+SMS ?= 61 70 72 75 80 86 87 90
 else
-SMS ?= 60 61 70 75 80 86
+SMS ?= 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/systemWideAtomics/NsightEclipse.xml b/Samples/0_Introduction/systemWideAtomics/NsightEclipse.xml
index f9099627..05284552 100644
--- a/Samples/0_Introduction/systemWideAtomics/NsightEclipse.xml
+++ b/Samples/0_Introduction/systemWideAtomics/NsightEclipse.xml
@@ -3,10 +3,10 @@
 <entry>
   <name>systemWideAtomics</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMallocManaged</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[A simple demonstration of system wide atomic instructions.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -44,6 +44,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/systemWideAtomics/README.md b/Samples/0_Introduction/systemWideAtomics/README.md
index c838fcd5..98f2a062 100644
--- a/Samples/0_Introduction/systemWideAtomics/README.md
+++ b/Samples/0_Introduction/systemWideAtomics/README.md
@@ -10,7 +10,7 @@ Atomic Intrinsics, Unified Memory
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceSynchronize, cudaMallocManaged, cudaGetDeviceProperties
+cudaDeviceSynchronize, cudaMallocManaged, cudaGetDeviceProperties, cudaFree
 
 ## Dependencies needed to build/run
 [UVM](../../../README.md#uvm)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/template/Makefile b/Samples/0_Introduction/template/Makefile
index a462a637..47f37966 100644
--- a/Samples/0_Introduction/template/Makefile
+++ b/Samples/0_Introduction/template/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/template/NsightEclipse.xml b/Samples/0_Introduction/template/NsightEclipse.xml
index e043d389..21fe74fb 100644
--- a/Samples/0_Introduction/template/NsightEclipse.xml
+++ b/Samples/0_Introduction/template/NsightEclipse.xml
@@ -4,8 +4,8 @@
   <name>template</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[A trivial template project that can be used as a starting point to create new CUDA projects.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -41,6 +41,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/template/README.md b/Samples/0_Introduction/template/README.md
index afcf51e1..f2244569 100644
--- a/Samples/0_Introduction/template/README.md
+++ b/Samples/0_Introduction/template/README.md
@@ -10,7 +10,7 @@ Device Memory Allocation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaMemcpy
+cudaMalloc, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/template/template_vs2017.vcxproj b/Samples/0_Introduction/template/template_vs2017.vcxproj
index 01694a19..5e436781 100644
--- a/Samples/0_Introduction/template/template_vs2017.vcxproj
+++ b/Samples/0_Introduction/template/template_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/template.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/template/template_vs2019.vcxproj b/Samples/0_Introduction/template/template_vs2019.vcxproj
index 606bb330..f736fc28 100644
--- a/Samples/0_Introduction/template/template_vs2019.vcxproj
+++ b/Samples/0_Introduction/template/template_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/template.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/template/template_vs2022.vcxproj b/Samples/0_Introduction/template/template_vs2022.vcxproj
index cc552ab4..6a6d8744 100644
--- a/Samples/0_Introduction/template/template_vs2022.vcxproj
+++ b/Samples/0_Introduction/template/template_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/template.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAdd/Makefile b/Samples/0_Introduction/vectorAdd/Makefile
index 3ecf5e49..62be1498 100644
--- a/Samples/0_Introduction/vectorAdd/Makefile
+++ b/Samples/0_Introduction/vectorAdd/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/0_Introduction/vectorAdd/NsightEclipse.xml b/Samples/0_Introduction/vectorAdd/NsightEclipse.xml
index 0df66051..353acee5 100644
--- a/Samples/0_Introduction/vectorAdd/NsightEclipse.xml
+++ b/Samples/0_Introduction/vectorAdd/NsightEclipse.xml
@@ -3,11 +3,11 @@
 <entry>
   <name>vectorAdd</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaMalloc</toolkit>
   </cuda_api_list>
   <description><![CDATA[This CUDA Runtime API sample is a very basic sample that implements element by element vector addition. It is the same as the sample illustrating Chapter 3 of the programming guide with some additions like error checking.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/0_Introduction/vectorAdd/README.md b/Samples/0_Introduction/vectorAdd/README.md
index 8a7e4464..99523169 100644
--- a/Samples/0_Introduction/vectorAdd/README.md
+++ b/Samples/0_Introduction/vectorAdd/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API, Vector Addition
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaGetLastError, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2017.vcxproj b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2017.vcxproj
index 6529e270..e8af3149 100644
--- a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2017.vcxproj
+++ b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/vectorAdd.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2019.vcxproj b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2019.vcxproj
index 295f1945..a25492e3 100644
--- a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2019.vcxproj
+++ b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/vectorAdd.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2022.vcxproj b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2022.vcxproj
index 4da53146..c983b88b 100644
--- a/Samples/0_Introduction/vectorAdd/vectorAdd_vs2022.vcxproj
+++ b/Samples/0_Introduction/vectorAdd/vectorAdd_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/vectorAdd.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAddDrv/Makefile b/Samples/0_Introduction/vectorAddDrv/Makefile
index c21fa942..472417b1 100644
--- a/Samples/0_Introduction/vectorAddDrv/Makefile
+++ b/Samples/0_Introduction/vectorAddDrv/Makefile
@@ -283,9 +283,9 @@ FATBIN_FILE := vectorAdd_kernel${TARGET_SIZE}.fatbin
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(GENCODE_FLAGS),)
diff --git a/Samples/0_Introduction/vectorAddDrv/README.md b/Samples/0_Introduction/vectorAddDrv/README.md
index 09612d23..ac26085a 100644
--- a/Samples/0_Introduction/vectorAddDrv/README.md
+++ b/Samples/0_Introduction/vectorAddDrv/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, Vector Addition
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuModuleLoadData, cuCtxCreate, cuLaunchKernel, cuMemAlloc, cuMemcpyDtoH, cuCtxSynchronize, cuMemFree, cuInit, cuCtxDestroy, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuModuleLoadData, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuCtxDestroy, cuModuleGetFunction, cuCtxCreate, cuInit
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2017.vcxproj b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2017.vcxproj
index bfd45966..57b06693 100644
--- a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2017.vcxproj
+++ b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/vectorAddDrv.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2019.vcxproj b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2019.vcxproj
index d1119c07..c48b9e14 100644
--- a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2019.vcxproj
+++ b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/vectorAddDrv.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2022.vcxproj b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2022.vcxproj
index 49a0a5fb..4f04109b 100644
--- a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2022.vcxproj
+++ b/Samples/0_Introduction/vectorAddDrv/vectorAddDrv_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/vectorAddDrv.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAddMMAP/Makefile b/Samples/0_Introduction/vectorAddMMAP/Makefile
index 72b2eaa4..743d2bdd 100644
--- a/Samples/0_Introduction/vectorAddMMAP/Makefile
+++ b/Samples/0_Introduction/vectorAddMMAP/Makefile
@@ -307,9 +307,9 @@ FATBIN_FILE := vectorAdd_kernel${TARGET_SIZE}.fatbin
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(GENCODE_FLAGS),)
diff --git a/Samples/0_Introduction/vectorAddMMAP/README.md b/Samples/0_Introduction/vectorAddMMAP/README.md
index 786e136f..6dbbcae7 100644
--- a/Samples/0_Introduction/vectorAddMMAP/README.md
+++ b/Samples/0_Introduction/vectorAddMMAP/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, Vector Addition, MMAP
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuMemSetAccess, cuInit, cuMemAddressReserve, cuModuleGetFunction, cuCtxDestroy, cuCtxCreate, cuMemAddressFree, cuMemGetAllocationGranularity, cuMemUnmap, cuDeviceGetAttribute, cuMemRelease, cuModuleLoadData, cuMemMap, cuMemCreate, cuMemcpyHtoD, cuDeviceCanAccessPeer, cuDeviceGetCount, cuLaunchKernel, cuMemcpyDtoH
+cuMemcpyDtoH, cuDeviceCanAccessPeer, cuModuleGetFunction, cuMemSetAccess, cuMemRelease, cuInit, cuMemcpyHtoD, cuLaunchKernel, cuMemCreate, cuModuleLoadData, cuCtxDestroy, cuDeviceGetCount, cuMemMap, cuDeviceGetAttribute, cuMemGetAllocationGranularity, cuMemAddressFree, cuMemUnmap, cuCtxCreate, cuMemAddressReserve
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj
index 6aa6aa9e..00641d9b 100644
--- a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj
+++ b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/vectorAddMMAP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj
index ece800ca..a4885080 100644
--- a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj
+++ b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/vectorAddMMAP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2022.vcxproj b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2022.vcxproj
index 81fe95b6..59ad3c82 100644
--- a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2022.vcxproj
+++ b/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/vectorAddMMAP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/README.md b/Samples/0_Introduction/vectorAdd_nvrtc/README.md
index 5e83b512..03ed2a74 100644
--- a/Samples/0_Introduction/vectorAdd_nvrtc/README.md
+++ b/Samples/0_Introduction/vectorAdd_nvrtc/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, Vector Addition, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 cudaBlockSize, cudaGridSize
@@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj
index c8331245..1ad04a6d 100644
--- a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj
+++ b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj
index 27cd03de..e140f5eb 100644
--- a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj
+++ b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2022.vcxproj b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2022.vcxproj
index db61b475..a575fb82 100644
--- a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2022.vcxproj
+++ b/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/bandwidthTest/Makefile b/Samples/1_Utilities/bandwidthTest/Makefile
index 31f60de5..8699a8bc 100644
--- a/Samples/1_Utilities/bandwidthTest/Makefile
+++ b/Samples/1_Utilities/bandwidthTest/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/1_Utilities/bandwidthTest/NsightEclipse.xml b/Samples/1_Utilities/bandwidthTest/NsightEclipse.xml
index 840ab273..6078a765 100644
--- a/Samples/1_Utilities/bandwidthTest/NsightEclipse.xml
+++ b/Samples/1_Utilities/bandwidthTest/NsightEclipse.xml
@@ -3,22 +3,22 @@
 <entry>
   <name>bandwidthTest</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaHostAlloc</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This is a simple test program to measure the memcopy bandwidth of the GPU and memcpy bandwidth across PCI-e. This test application is capable of measuring device to device copy bandwidth, host to device copy bandwidth for pageable and page-locked memory, and device to host copy bandwidth for pageable and page-locked memory.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -58,6 +58,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/1_Utilities/bandwidthTest/README.md b/Samples/1_Utilities/bandwidthTest/README.md
index 32e4f779..2cf0bad7 100644
--- a/Samples/1_Utilities/bandwidthTest/README.md
+++ b/Samples/1_Utilities/bandwidthTest/README.md
@@ -10,7 +10,7 @@ CUDA Streams and Events, Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaMallocHost, cudaHostAlloc, cudaEventCreate, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpyAsync, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaHostAlloc, cudaMemcpy, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaGetErrorString, cudaMallocHost, cudaSetDevice, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2017.vcxproj b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2017.vcxproj
index 38f1b77f..ad862bff 100644
--- a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2017.vcxproj
+++ b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/bandwidthTest.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2019.vcxproj b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2019.vcxproj
index 59300fc1..e6f5f304 100644
--- a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2019.vcxproj
+++ b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bandwidthTest.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2022.vcxproj b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2022.vcxproj
index 9faf45d5..676302bd 100644
--- a/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2022.vcxproj
+++ b/Samples/1_Utilities/bandwidthTest/bandwidthTest_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bandwidthTest.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/deviceQuery/Makefile b/Samples/1_Utilities/deviceQuery/Makefile
index ea0d25c1..44dd2fbc 100644
--- a/Samples/1_Utilities/deviceQuery/Makefile
+++ b/Samples/1_Utilities/deviceQuery/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/1_Utilities/deviceQuery/NsightEclipse.xml b/Samples/1_Utilities/deviceQuery/NsightEclipse.xml
index 842dea35..dda30eb7 100644
--- a/Samples/1_Utilities/deviceQuery/NsightEclipse.xml
+++ b/Samples/1_Utilities/deviceQuery/NsightEclipse.xml
@@ -5,12 +5,12 @@
   <cuda_api_list>
     <driver>cuDeviceGetAttribute</driver>
     <driver>cuSafeCallNoSync</driver>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaDriverGetVersion</toolkit>
-    <toolkit>cudaDeviceCanAccessPeer</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaRuntimeGetVersion</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaDeviceCanAccessPeer</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaDriverGetVersion</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample enumerates the properties of the CUDA devices present in the system.]]></description>
@@ -48,6 +48,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/1_Utilities/deviceQuery/README.md b/Samples/1_Utilities/deviceQuery/README.md
index 794c5f76..4f4a647d 100644
--- a/Samples/1_Utilities/deviceQuery/README.md
+++ b/Samples/1_Utilities/deviceQuery/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API, Device Query
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -26,11 +26,11 @@ x86_64, ppc64le, armv7l, aarch64
 cuDeviceGetAttribute, cuSafeCallNoSync
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGetDeviceCount, cudaDriverGetVersion, cudaDeviceCanAccessPeer, cudaSetDevice, cudaRuntimeGetVersion, cudaGetErrorString, cudaGetDeviceProperties
+cudaRuntimeGetVersion, cudaGetErrorString, cudaDeviceCanAccessPeer, cudaSetDevice, cudaGetDeviceCount, cudaDriverGetVersion, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2017.vcxproj b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2017.vcxproj
index e52b7e5f..87cca12f 100644
--- a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2017.vcxproj
+++ b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/deviceQuery.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2019.vcxproj b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2019.vcxproj
index 3b4b1f75..41b5bebd 100644
--- a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2019.vcxproj
+++ b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/deviceQuery.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2022.vcxproj b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2022.vcxproj
index 4d9684d9..4ba036e5 100644
--- a/Samples/1_Utilities/deviceQuery/deviceQuery_vs2022.vcxproj
+++ b/Samples/1_Utilities/deviceQuery/deviceQuery_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/deviceQuery.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/deviceQueryDrv/NsightEclipse.xml b/Samples/1_Utilities/deviceQueryDrv/NsightEclipse.xml
index 999dc70e..7a56e697 100644
--- a/Samples/1_Utilities/deviceQueryDrv/NsightEclipse.xml
+++ b/Samples/1_Utilities/deviceQueryDrv/NsightEclipse.xml
@@ -3,13 +3,14 @@
 <entry>
   <name>deviceQueryDrv</name>
   <cuda_api_list>
-    <driver>cuDeviceCanAccessPeer</driver>
-    <driver>cuDriverGetVersion</driver>
-    <driver>cuDeviceGetCount</driver>
     <driver>cuDeviceGetName</driver>
-    <driver>cuDeviceTotalMem</driver>
-    <driver>cuInit</driver>
     <driver>cuDeviceGetAttribute</driver>
+    <driver>cuDeviceTotalMem</driver>
+    <driver>cuDeviceCanAccessPeer</driver>
+    <driver>cuDeviceGetCount</driver>
+    <driver>cuDriverGetVersion</driver>
+    <driver>cuInit</driver>
+    <toolkit>cudaSetDevice</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample enumerates the properties of the CUDA devices present using CUDA Driver API calls]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -49,6 +50,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/1_Utilities/deviceQueryDrv/README.md b/Samples/1_Utilities/deviceQueryDrv/README.md
index 5d80066c..92d02352 100644
--- a/Samples/1_Utilities/deviceQueryDrv/README.md
+++ b/Samples/1_Utilities/deviceQueryDrv/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, Device Query
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuDeviceCanAccessPeer, cuDriverGetVersion, cuDeviceGetCount, cuDeviceGetName, cuDeviceTotalMem, cuInit, cuDeviceGetAttribute
+cuDeviceGetName, cuDeviceGetAttribute, cuDeviceTotalMem, cuDeviceCanAccessPeer, cuDeviceGetCount, cuDriverGetVersion, cuInit
+
+### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
+cudaSetDevice
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2017.vcxproj b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2017.vcxproj
index a94caee2..59f77b9b 100644
--- a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2017.vcxproj
+++ b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2019.vcxproj b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2019.vcxproj
index 282fef95..629a2e6e 100644
--- a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2019.vcxproj
+++ b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2022.vcxproj b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2022.vcxproj
index 30aaeef4..5c8aab7e 100644
--- a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2022.vcxproj
+++ b/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/topologyQuery/Makefile b/Samples/1_Utilities/topologyQuery/Makefile
index ea562174..9a48838c 100644
--- a/Samples/1_Utilities/topologyQuery/Makefile
+++ b/Samples/1_Utilities/topologyQuery/Makefile
@@ -297,9 +297,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/1_Utilities/topologyQuery/NsightEclipse.xml b/Samples/1_Utilities/topologyQuery/NsightEclipse.xml
index 722261a6..8bfd757d 100644
--- a/Samples/1_Utilities/topologyQuery/NsightEclipse.xml
+++ b/Samples/1_Utilities/topologyQuery/NsightEclipse.xml
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/1_Utilities/topologyQuery/README.md b/Samples/1_Utilities/topologyQuery/README.md
index 4f4093fa..e08fa339 100644
--- a/Samples/1_Utilities/topologyQuery/README.md
+++ b/Samples/1_Utilities/topologyQuery/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Multi-GPU
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -27,7 +27,7 @@ cudaGetDeviceCount, cudaDeviceGetAttribute
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2017.vcxproj b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2017.vcxproj
index df094c22..feecd32f 100644
--- a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2017.vcxproj
+++ b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/topologyQuery.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2019.vcxproj b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2019.vcxproj
index 62f21c12..245f929b 100644
--- a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2019.vcxproj
+++ b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/topologyQuery.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2022.vcxproj b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2022.vcxproj
index 40ba0ca7..1d81b933 100644
--- a/Samples/1_Utilities/topologyQuery/topologyQuery_vs2022.vcxproj
+++ b/Samples/1_Utilities/topologyQuery/topologyQuery_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/topologyQuery.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/Makefile b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/Makefile
index 0200b235..dabf0516 100644
--- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/Makefile
+++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/Makefile
@@ -301,9 +301,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/NsightEclipse.xml
index 0ab4b349..6df81e06 100644
--- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/NsightEclipse.xml
@@ -3,42 +3,42 @@
 <entry>
   <name>EGLStream_CUDA_CrossGPU</name>
   <cuda_api_list>
-    <driver>cuEGLStreamConsumerConnect</driver>
-    <driver>cuMemFree</driver>
-    <driver>cuInit</driver>
-    <driver>cuStreamCreate</driver>
-    <driver>cuCtxCreate</driver>
-    <driver>cuGraphicsResourceGetMappedEglFrame</driver>
     <driver>cuDeviceGetName</driver>
-    <driver>cuCtxSynchronize</driver>
-    <driver>cuEGLStreamConsumerAcquireFrame</driver>
-    <driver>cuDeviceGet</driver>
-    <driver>cuDeviceGetAttribute</driver>
-    <driver>cuMemAlloc</driver>
     <driver>cuEGLStreamConsumerReleaseFrame</driver>
+    <driver>cuEGLStreamConsumerConnect</driver>
+    <driver>cuEGLStreamConsumerDisconnect</driver>
+    <driver>cuCtxPushCurrent</driver>
+    <driver>cuEGLStreamProducerReturnFrame</driver>
+    <driver>cuStreamCreate</driver>
+    <driver>cuEGLStreamProducerPresentFrame</driver>
+    <driver>cuMemFree</driver>
+    <driver>cuGraphicsResourceGetMappedEglFrame</driver>
+    <driver>cuInit</driver>
+    <driver>cuMemcpyHtoD</driver>
+    <driver>cuDeviceGet</driver>
+    <driver>cuEGLStreamConsumerAcquireFrame</driver>
     <driver>cuEGLStreamProducerDisconnect</driver>
     <driver>cuEGLStreamProducerConnect</driver>
-    <driver>cuEGLStreamConsumerDisconnect</driver>
-    <driver>cuMemcpyHtoD</driver>
-    <driver>cuEGLStreamProducerReturnFrame</driver>
-    <driver>cuCtxPushCurrent</driver>
+    <driver>cuDeviceGetAttribute</driver>
+    <driver>cuCtxSynchronize</driver>
+    <driver>cuMemAlloc</driver>
     <driver>cuCtxPopCurrent</driver>
-    <driver>cuEGLStreamProducerPresentFrame</driver>
-    <toolkit>cudaDeviceCreateConsumer</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaConsumerReleaseFrame</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaGetValueMismatch</toolkit>
-    <toolkit>cudaProducerDeinit</toolkit>
-    <toolkit>cudaProducerPresentFrame</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaProducerInit</toolkit>
-    <toolkit>cudaProducerReturnFrame</toolkit>
-    <toolkit>cudaProducerPrepareFrame</toolkit>
-    <toolkit>cudaConsumerAcquireFrame</toolkit>
+    <driver>cuCtxCreate</driver>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaProducerPresentFrame</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaConsumerReleaseFrame</toolkit>
+    <toolkit>cudaProducerReturnFrame</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaDeviceCreateProducer</toolkit>
+    <toolkit>cudaProducerDeinit</toolkit>
+    <toolkit>cudaProducerPrepareFrame</toolkit>
+    <toolkit>cudaGetValueMismatch</toolkit>
+    <toolkit>cudaConsumerAcquireFrame</toolkit>
+    <toolkit>cudaProducerInit</toolkit>
+    <toolkit>cudaDeviceCreateConsumer</toolkit>
   </cuda_api_list>
   <description><![CDATA[Demonstrates CUDA and EGL Streams interop, where consumer's EGL Stream is on one GPU and producer's on other and both consumer-producer are different processes.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -81,6 +81,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md
index 2178db66..b559583b 100644
--- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md
+++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md
@@ -10,7 +10,7 @@ EGLStreams Interop
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,17 +23,17 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuEGLStreamConsumerConnect, cuMemFree, cuInit, cuStreamCreate, cuCtxCreate, cuGraphicsResourceGetMappedEglFrame, cuDeviceGetName, cuCtxSynchronize, cuEGLStreamConsumerAcquireFrame, cuDeviceGet, cuDeviceGetAttribute, cuMemAlloc, cuEGLStreamConsumerReleaseFrame, cuEGLStreamProducerDisconnect, cuEGLStreamProducerConnect, cuEGLStreamConsumerDisconnect, cuMemcpyHtoD, cuEGLStreamProducerReturnFrame, cuCtxPushCurrent, cuCtxPopCurrent, cuEGLStreamProducerPresentFrame
+cuDeviceGetName, cuEGLStreamConsumerReleaseFrame, cuEGLStreamConsumerConnect, cuEGLStreamConsumerDisconnect, cuCtxPushCurrent, cuEGLStreamProducerReturnFrame, cuStreamCreate, cuEGLStreamProducerPresentFrame, cuMemFree, cuGraphicsResourceGetMappedEglFrame, cuInit, cuMemcpyHtoD, cuDeviceGet, cuEGLStreamConsumerAcquireFrame, cuEGLStreamProducerDisconnect, cuEGLStreamProducerConnect, cuDeviceGetAttribute, cuCtxSynchronize, cuMemAlloc, cuCtxPopCurrent, cuCtxCreate
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceCreateConsumer, cudaFree, cudaConsumerReleaseFrame, cudaDeviceSynchronize, cudaGetValueMismatch, cudaProducerDeinit, cudaProducerPresentFrame, cudaMalloc, cudaProducerInit, cudaProducerReturnFrame, cudaProducerPrepareFrame, cudaConsumerAcquireFrame, cudaMemcpy, cudaGetErrorString, cudaDeviceCreateProducer
+cudaMemcpy, cudaMalloc, cudaProducerPresentFrame, cudaFree, cudaGetErrorString, cudaConsumerReleaseFrame, cudaProducerReturnFrame, cudaDeviceSynchronize, cudaDeviceCreateProducer, cudaProducerDeinit, cudaProducerPrepareFrame, cudaGetValueMismatch, cudaConsumerAcquireFrame, cudaProducerInit, cudaDeviceCreateConsumer
 
 ## Dependencies needed to build/run
 [EGL](../../../README.md#egl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/findegl.mk b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/findegl.mk
index cfeee899..33ec1a96 100644
--- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/findegl.mk
+++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/findegl.mk
@@ -58,6 +58,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -84,22 +85,12 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
-    ifeq ("$(SUSE)","0")
+	
+	ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/NsightEclipse.xml
index d84d9313..ce22364a 100644
--- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/NsightEclipse.xml
@@ -3,23 +3,36 @@
 <entry>
   <name>EGLStream_CUDA_Interop</name>
   <cuda_api_list>
-    <driver>cuDeviceGet</driver>
-    <driver>cuDeviceGetAttribute</driver>
-    <driver>cuDeviceComputeCapability</driver>
-    <driver>cuDeviceGetCount</driver>
+    <driver>cuMemcpyDtoH</driver>
     <driver>cuDeviceGetName</driver>
-    <driver>cuGraphicsResourceGetMappedEglFrame</driver>
-    <driver>cuEGLStreamConsumerAcquireFrame</driver>
     <driver>cuEGLStreamConsumerReleaseFrame</driver>
-    <driver>cuEGLStreamProducerPresentFrame</driver>
-    <driver>cuCtxCreate</driver>
-    <driver>cuMemAlloc</driver>
-    <driver>cuMemFree</driver>
-    <driver>cuMemcpy3D</driver>
-    <driver>cuStreamCreate</driver>
+    <driver>cuEGLStreamConsumerConnect</driver>
+    <driver>cuEGLStreamConsumerDisconnect</driver>
     <driver>cuCtxPushCurrent</driver>
+    <driver>cuArrayDestroy</driver>
+    <driver>cuEGLStreamProducerReturnFrame</driver>
+    <driver>cuEGLStreamProducerPresentFrame</driver>
+    <driver>cuMemFree</driver>
+    <driver>cuGraphicsResourceGetMappedEglFrame</driver>
+    <driver>cuInit</driver>
+    <driver>cuEGLStreamConsumerAcquireFrame</driver>
+    <driver>cuEGLStreamProducerDisconnect</driver>
+    <driver>cuDeviceGetCount</driver>
+    <driver>cuEGLStreamProducerConnect</driver>
+    <driver>cuDeviceGetAttribute</driver>
+    <driver>cuCtxSynchronize</driver>
+    <driver>cuMemAlloc</driver>
     <driver>cuCtxPopCurrent</driver>
-    <driver>cuCtxDestroy</driver>
+    <driver>cuCtxCreate</driver>
+    <driver>cuMemcpy</driver>
+    <toolkit>cudaProducerReadYUVFrame</toolkit>
+    <toolkit>cudaProducerTest</toolkit>
+    <toolkit>cudaProducerDeinit</toolkit>
+    <toolkit>cudaDeviceCreateProducer</toolkit>
+    <toolkit>cudaProducerReadARGBFrame</toolkit>
+    <toolkit>cudaDeviceCreateConsumer</toolkit>
+    <toolkit>cudaConsumerTest</toolkit>
+    <toolkit>cudaProducerInit</toolkit>
   </cuda_api_list>
   <description><![CDATA[Demonstrates data exchange between CUDA and EGL Streams.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -53,6 +66,7 @@
   <sm-arch>sm37</sm-arch>
   <sm-arch>sm50</sm-arch>
   <sm-arch>sm52</sm-arch>
+  <sm-arch>sm53</sm-arch>
   <sm-arch>sm60</sm-arch>
   <sm-arch>sm61</sm-arch>
   <sm-arch>sm70</sm-arch>
@@ -60,6 +74,8 @@
   <sm-arch>sm75</sm-arch>
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
+  <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md
index 3ccef85d..5cee12b0 100644
--- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md
+++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md
@@ -10,7 +10,7 @@ EGLStreams Interop
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,17 +23,17 @@ x86_64, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuEGLStreamConsumerConnect, cuArrayDestroy, cuMemFree, cuInit, cuCtxCreate, cuGraphicsResourceGetMappedEglFrame, cuDeviceGetName, cuCtxSynchronize, cuEGLStreamConsumerAcquireFrame, cuDeviceGetAttribute, cuMemcpy, cuMemAlloc, cuEGLStreamConsumerReleaseFrame, cuEGLStreamProducerDisconnect, cuEGLStreamProducerConnect, cuEGLStreamConsumerDisconnect, cuDeviceGetCount, cuEGLStreamProducerReturnFrame, cuCtxPushCurrent, cuCtxPopCurrent, cuMemcpyDtoH, cuEGLStreamProducerPresentFrame
+cuMemcpyDtoH, cuDeviceGetName, cuEGLStreamConsumerReleaseFrame, cuEGLStreamConsumerConnect, cuEGLStreamConsumerDisconnect, cuCtxPushCurrent, cuArrayDestroy, cuEGLStreamProducerReturnFrame, cuEGLStreamProducerPresentFrame, cuMemFree, cuGraphicsResourceGetMappedEglFrame, cuInit, cuEGLStreamConsumerAcquireFrame, cuEGLStreamProducerDisconnect, cuDeviceGetCount, cuEGLStreamProducerConnect, cuDeviceGetAttribute, cuCtxSynchronize, cuMemAlloc, cuCtxPopCurrent, cuCtxCreate, cuMemcpy
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceCreateConsumer, cudaConsumerTest, cudaProducerDeinit, cudaProducerInit, cudaProducerReadYUVFrame, cudaProducerTest, cudaProducerReadARGBFrame, cudaDeviceCreateProducer
+cudaProducerReadYUVFrame, cudaProducerTest, cudaProducerDeinit, cudaDeviceCreateProducer, cudaProducerReadARGBFrame, cudaDeviceCreateConsumer, cudaConsumerTest, cudaProducerInit
 
 ## Dependencies needed to build/run
 [EGL](../../../README.md#egl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp
index f15c5cd1..ef3adab2 100644
--- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp
+++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp
@@ -301,7 +301,7 @@ CUresult cudaDeviceCreateProducer(test_cuda_producer_s *cudaProducer,
 
   if (major < 6) {
     printf(
-        "EGLStreams_CUDA_Interop requires SM 6.0 or higher arch GPU.  "
+        "EGLStream_CUDA_Interop requires SM 6.0 or higher arch GPU.  "
         "Exiting...\n");
     exit(2);  // EXIT_WAIVED
   }
diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/findegl.mk b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/findegl.mk
index cfeee899..33ec1a96 100644
--- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/findegl.mk
+++ b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/findegl.mk
@@ -58,6 +58,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -84,22 +85,12 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
-    ifeq ("$(SUSE)","0")
+	
+	ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/Makefile b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/Makefile
index 9a670c78..86cfb928 100644
--- a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/Makefile
+++ b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/Makefile
@@ -321,9 +321,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/NsightEclipse.xml
index fbb91f1f..63995525 100644
--- a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/NsightEclipse.xml
@@ -3,27 +3,27 @@
 <entry>
   <name>EGLSync_CUDAEvent_Interop</name>
   <cuda_api_list>
-    <driver>cuGraphicsEGLRegisterImage</driver>
-    <driver>cuStreamCreate</driver>
-    <driver>cuEventCreate</driver>
-    <driver>cuCtxCreate</driver>
-    <driver>cuGraphicsSubResourceGetMappedArray</driver>
-    <driver>cuGraphicsUnregisterResource</driver>
-    <driver>cuCtxSynchronize</driver>
-    <driver>cuEventCreateFromEGLSync</driver>
-    <driver>cuEventDestroy</driver>
-    <driver>cuStreamWaitEvent</driver>
-    <driver>cuCtxPushCurrent</driver>
-    <driver>cuSurfObjectCreate</driver>
-    <driver>cuInit</driver>
     <driver>cuEventRecord</driver>
     <driver>cuDeviceGetAttribute</driver>
+    <driver>cuEventCreate</driver>
+    <driver>cuCtxSynchronize</driver>
+    <driver>cuEventDestroy</driver>
+    <driver>cuGraphicsEGLRegisterImage</driver>
+    <driver>cuGraphicsSubResourceGetMappedArray</driver>
+    <driver>cuStreamCreate</driver>
+    <driver>cuStreamWaitEvent</driver>
+    <driver>cuGraphicsUnregisterResource</driver>
+    <driver>cuCtxCreate</driver>
+    <driver>cuSurfObjectCreate</driver>
+    <driver>cuEventCreateFromEGLSync</driver>
+    <driver>cuCtxPushCurrent</driver>
+    <driver>cuInit</driver>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaGetValueMismatch</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
   </cuda_api_list>
   <description><![CDATA[Demonstrates interoperability between CUDA Event and EGL Sync/EGL Image using which one can achieve synchronization on GPU itself for GL-EGL-CUDA operations instead of blocking CPU for synchronization.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -72,6 +72,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>arm</arch>
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/README.md b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/README.md
index 655881d1..8c980b52 100644
--- a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/README.md
+++ b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/README.md
@@ -10,7 +10,7 @@ EGLSync-CUDAEvent Interop, EGLImage-CUDA Interop
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,17 +23,17 @@ armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuGraphicsEGLRegisterImage, cuStreamCreate, cuEventCreate, cuCtxCreate, cuGraphicsSubResourceGetMappedArray, cuGraphicsUnregisterResource, cuCtxSynchronize, cuEventCreateFromEGLSync, cuEventDestroy, cuStreamWaitEvent, cuCtxPushCurrent, cuSurfObjectCreate, cuInit, cuEventRecord, cuDeviceGetAttribute
+cuEventRecord, cuDeviceGetAttribute, cuEventCreate, cuCtxSynchronize, cuEventDestroy, cuGraphicsEGLRegisterImage, cuGraphicsSubResourceGetMappedArray, cuStreamCreate, cuStreamWaitEvent, cuGraphicsUnregisterResource, cuCtxCreate, cuSurfObjectCreate, cuEventCreateFromEGLSync, cuCtxPushCurrent, cuInit
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceSynchronize, cudaGetValueMismatch, cudaMalloc, cudaMemcpy, cudaGetErrorString
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaDeviceSynchronize, cudaGetValueMismatch, cudaMalloc
 
 ## Dependencies needed to build/run
 [EGL](../../../README.md#egl), [EGLSync](../../../README.md#eglsync), [X11](../../../README.md#x11), [GLES](../../../README.md#gles)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/findegl.mk b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/findegl.mk
index cfeee899..33ec1a96 100644
--- a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/findegl.mk
+++ b/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/findegl.mk
@@ -58,6 +58,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -84,22 +85,12 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
-    ifeq ("$(SUSE)","0")
+	
+	ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2017.vcxproj
index 93f0c4b6..e80efd5b 100644
--- a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/FunctionPointers.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2019.vcxproj
index a67cbbcd..d5cbccdc 100644
--- a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/FunctionPointers.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2022.vcxproj
index 45617a67..ca5eb33b 100644
--- a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/FunctionPointers.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/Makefile b/Samples/2_Concepts_and_Techniques/FunctionPointers/Makefile
index 47c204fe..651a4f81 100644
--- a/Samples/2_Concepts_and_Techniques/FunctionPointers/Makefile
+++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/FunctionPointers/NsightEclipse.xml
index ecd90f9d..f90f7b34 100644
--- a/Samples/2_Concepts_and_Techniques/FunctionPointers/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/NsightEclipse.xml
@@ -3,21 +3,21 @@
 <entry>
   <name>FunctionPointers</name>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMemcpyFromSymbol</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample illustrates how to use function pointers and implements the Sobel Edge Detection filter for 8-bit monochrome images.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -88,6 +88,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/README.md b/Samples/2_Concepts_and_Techniques/FunctionPointers/README.md
index a95ba59d..83e16413 100644
--- a/Samples/2_Concepts_and_Techniques/FunctionPointers/README.md
+++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaMemcpyFromSymbol
+cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMemcpyFromSymbol, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemcpyToSymbol, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/findgllib.mk b/Samples/2_Concepts_and_Techniques/FunctionPointers/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/2_Concepts_and_Techniques/FunctionPointers/findgllib.mk
+++ b/Samples/2_Concepts_and_Techniques/FunctionPointers/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2017.vcxproj
index 7826468d..9b6616f6 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiInlineP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2019.vcxproj
index accf4529..fd17c3e2 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiInlineP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2022.vcxproj
index 6ba15531..9d5110d5 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/MC_EstimatePiInlineP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiInlineP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/Makefile b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/Makefile
index 6bc2ee93..c4a3fa53 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/Makefile
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/NsightEclipse.xml
index 5c63395b..bf9c24d2 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>MC_EstimatePiInlineP</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaFuncGetAttributes</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaFuncGetAttributes</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample uses Monte Carlo simulation for Estimation of Pi (using inline PRNG).  This sample also uses the NVIDIA CURAND library.]]></description>
@@ -60,6 +60,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md
index 53b09b57..a7d40678 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md
@@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CURAND](../../../README.md#curand)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2017.vcxproj
index d5ad649b..9ac21844 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiInlineQ.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2019.vcxproj
index bdf88dd5..b8246a1a 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiInlineQ.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2022.vcxproj
index 4a8efc23..da748e1f 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/MC_EstimatePiInlineQ_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiInlineQ.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/Makefile b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/Makefile
index 20a15720..5d8b086b 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/Makefile
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/NsightEclipse.xml
index ed2f8f7a..f087e82b 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>MC_EstimatePiInlineQ</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaFuncGetAttributes</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaFuncGetAttributes</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample uses Monte Carlo simulation for Estimation of Pi (using inline QRNG).  This sample also uses the NVIDIA CURAND library.]]></description>
@@ -60,6 +60,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md
index 96e19b72..485c16aa 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md
@@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CURAND](../../../README.md#curand)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2017.vcxproj
index a628995c..c6418871 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2019.vcxproj
index fb20f5f1..c188ae7d 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2022.vcxproj
index ec7b8f28..c9893f39 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/MC_EstimatePiP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/Makefile b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/Makefile
index fba6735f..0e5b4237 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/Makefile
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/NsightEclipse.xml
index 6578c3f4..56a07dea 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>MC_EstimatePiP</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaFuncGetAttributes</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaFuncGetAttributes</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample uses Monte Carlo simulation for Estimation of Pi (using batch PRNG).  This sample also uses the NVIDIA CURAND library.]]></description>
@@ -60,6 +60,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/README.md b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/README.md
index 098e77f8..4390385f 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/README.md
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/README.md
@@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CURAND](../../../README.md#curand)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2017.vcxproj
index 83aba18a..95b72fe5 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiQ.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2019.vcxproj
index ae4a6711..ff931633 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiQ.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2022.vcxproj
index 9e11377d..5714f941 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/MC_EstimatePiQ_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_EstimatePiQ.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/Makefile b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/Makefile
index 98d071f9..61ae97d2 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/Makefile
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/NsightEclipse.xml
index 45497eea..71f9c101 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>MC_EstimatePiQ</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaFuncGetAttributes</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaFuncGetAttributes</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample uses Monte Carlo simulation for Estimation of Pi (using batch QRNG).  This sample also uses the NVIDIA CURAND library.]]></description>
@@ -60,6 +60,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md
index dd73101e..c6bac7b7 100644
--- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md
+++ b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md
@@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CURAND](../../../README.md#curand)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2017.vcxproj
index df94f0e1..c9d46e45 100644
--- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/MC_SingleAsianOptionP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2019.vcxproj
index c6830be4..1a31f284 100644
--- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_SingleAsianOptionP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2022.vcxproj
index ab727ab0..a97d1d42 100644
--- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/MC_SingleAsianOptionP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MC_SingleAsianOptionP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/Makefile b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/Makefile
index c2e3b080..97baec61 100644
--- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/Makefile
+++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/NsightEclipse.xml
index 52a3844d..e11b104e 100644
--- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>MC_SingleAsianOptionP</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaFuncGetAttributes</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaFuncGetAttributes</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample uses Monte Carlo to simulate Single Asian Options using the NVIDIA CURAND library.]]></description>
@@ -61,6 +61,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md
index f2a986d5..f6f6cd1a 100644
--- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md
+++ b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md
@@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFuncGetAttributes, cudaGetDeviceCount, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CURAND](../../../README.md#curand)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/README.md b/Samples/2_Concepts_and_Techniques/README.md
index 00265194..b407d80b 100644
--- a/Samples/2_Concepts_and_Techniques/README.md
+++ b/Samples/2_Concepts_and_Techniques/README.md
@@ -19,7 +19,7 @@ This sample demonstrates how Discrete Cosine Transform (DCT) for blocks of 8 by
 ### [EGLStream_CUDA_CrossGPU](./EGLStream_CUDA_CrossGPU)
 Demonstrates CUDA and EGL Streams interop, where consumer's EGL Stream is on one GPU and producer's on other and both consumer-producer are different processes.
 
-### [EGLStreams_CUDA_Interop](./EGLStreams_CUDA_Interop)
+### [EGLStream_CUDA_Interop](./EGLStream_CUDA_Interop)
 Demonstrates data exchange between CUDA and EGL Streams.
 
 ### [EGLSync_CUDAEvent_Interop](./EGLSync_CUDAEvent_Interop)
diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/Makefile b/Samples/2_Concepts_and_Techniques/boxFilter/Makefile
index baec273e..ef50006a 100644
--- a/Samples/2_Concepts_and_Techniques/boxFilter/Makefile
+++ b/Samples/2_Concepts_and_Techniques/boxFilter/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/boxFilter/NsightEclipse.xml
index 60ada986..5dc27790 100644
--- a/Samples/2_Concepts_and_Techniques/boxFilter/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/boxFilter/NsightEclipse.xml
@@ -3,21 +3,21 @@
 <entry>
   <name>boxFilter</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaCreateChannelDesc</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaCreateChannelDesc</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[Fast image box filter using CUDA with OpenGL rendering.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -84,6 +84,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/README.md b/Samples/2_Concepts_and_Techniques/boxFilter/README.md
index f4d1299d..5be86cee 100644
--- a/Samples/2_Concepts_and_Techniques/boxFilter/README.md
+++ b/Samples/2_Concepts_and_Techniques/boxFilter/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaCreateChannelDesc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetErrorString
+cudaGraphicsUnmapResources, cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaFree, cudaGetErrorString, cudaMemcpy, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2017.vcxproj
index f5cef656..e37e5362 100644
--- a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/boxFilter.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -119,6 +119,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2019.vcxproj
index 5e2b348e..08573611 100644
--- a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/boxFilter.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -115,6 +115,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2022.vcxproj
index 635ab0f1..113b623b 100644
--- a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/boxFilter.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -115,6 +115,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/findgllib.mk b/Samples/2_Concepts_and_Techniques/boxFilter/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/2_Concepts_and_Techniques/boxFilter/findgllib.mk
+++ b/Samples/2_Concepts_and_Techniques/boxFilter/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/Makefile b/Samples/2_Concepts_and_Techniques/convolutionSeparable/Makefile
index c189aba9..dd13e54b 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/Makefile
+++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/convolutionSeparable/NsightEclipse.xml
index a76eb569..6471a445 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/NsightEclipse.xml
@@ -3,11 +3,11 @@
 <entry>
   <name>convolutionSeparable</name>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements a separable convolution filter of a 2D signal with a gaussian kernel.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -53,6 +53,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/README.md b/Samples/2_Concepts_and_Techniques/convolutionSeparable/README.md
index defbd7ea..8afcf177 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/README.md
+++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/README.md
@@ -10,7 +10,7 @@ Image Processing, Data Parallel Algorithms
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMemcpyToSymbol, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2017.vcxproj
index 4cbaabb2..12140d61 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/convolutionSeparable.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2019.vcxproj
index c231867e..882b180c 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/convolutionSeparable.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2022.vcxproj
index e1ae239a..0bfe92ac 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/convolutionSeparable.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/Makefile b/Samples/2_Concepts_and_Techniques/convolutionTexture/Makefile
index 5eaca29f..e0631211 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionTexture/Makefile
+++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/convolutionTexture/NsightEclipse.xml
index 622b0ebc..c2a9e145 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionTexture/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/NsightEclipse.xml
@@ -3,15 +3,15 @@
 <entry>
   <name>convolutionTexture</name>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMemcpyToArray</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[Texture-based implementation of a separable 2D convolution with a gaussian kernel. Used for performance comparison against convolutionSeparable.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -59,6 +59,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/README.md b/Samples/2_Concepts_and_Techniques/convolutionTexture/README.md
index 11ef179a..b54a396c 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionTexture/README.md
+++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/README.md
@@ -10,7 +10,7 @@ Image Processing, Texture, Data Parallel Algorithms
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaFree, cudaMemcpyToArray, cudaFreeArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaMemcpy
+cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMemcpyToArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemcpyToSymbol, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2017.vcxproj
index 442fa53b..1769a595 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/convolutionTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2019.vcxproj
index 936b6fe5..1a367ce1 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/convolutionTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2022.vcxproj
index 24511a34..d9ff12c8 100644
--- a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/convolutionTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/cuHook/Makefile b/Samples/2_Concepts_and_Techniques/cuHook/Makefile
index 81ea73be..5986c7b7 100644
--- a/Samples/2_Concepts_and_Techniques/cuHook/Makefile
+++ b/Samples/2_Concepts_and_Techniques/cuHook/Makefile
@@ -329,9 +329,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/cuHook/README.md b/Samples/2_Concepts_and_Techniques/cuHook/README.md
index 05833d89..39ec1cbe 100644
--- a/Samples/2_Concepts_and_Techniques/cuHook/README.md
+++ b/Samples/2_Concepts_and_Techniques/cuHook/README.md
@@ -12,7 +12,7 @@ Debugging
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -25,14 +25,14 @@ x86_64, ppc64le
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuDeviceGetCount, cuCtxCreate, cuMemAlloc, cuHookInfo, cuHookRegisterCallback, cuHook, cuMemFree, cuInit, cuCtxDestroy
+cuHook, cuMemAlloc, cuHookInfo, cuHookRegisterCallback, cuCtxDestroy, cuMemFree, cuDeviceGetCount, cuCtxCreate, cuInit
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceReset
+cudaDeviceReset, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/Makefile b/Samples/2_Concepts_and_Techniques/dct8x8/Makefile
index b00e4d35..0540bc00 100644
--- a/Samples/2_Concepts_and_Techniques/dct8x8/Makefile
+++ b/Samples/2_Concepts_and_Techniques/dct8x8/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/dct8x8/NsightEclipse.xml
index 3bbe52f9..adb06756 100644
--- a/Samples/2_Concepts_and_Techniques/dct8x8/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/dct8x8/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>dct8x8</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaMallocPitch</toolkit>
-    <toolkit>cudaCreateTextureObject</toolkit>
     <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMallocPitch</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates how Discrete Cosine Transform (DCT) for blocks of 8 by 8 pixels can be performed using CUDA: a naive implementation by definition and a more traditional approach used in many libraries. As opposed to implementing DCT in a fragment shader, CUDA allows for an easier and more efficient implementation.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -57,6 +57,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/README.md b/Samples/2_Concepts_and_Techniques/dct8x8/README.md
index 337b2ba2..7e0e24f3 100644
--- a/Samples/2_Concepts_and_Techniques/dct8x8/README.md
+++ b/Samples/2_Concepts_and_Techniques/dct8x8/README.md
@@ -10,7 +10,7 @@ Image Processing, Video Compression
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaFreeArray, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaMallocArray
+cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2017.vcxproj
index 11ef09af..5044890e 100644
--- a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/dct8x8.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -115,6 +115,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2019.vcxproj
index 7eace209..8452c9db 100644
--- a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/dct8x8.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2022.vcxproj
index f7110841..b0fd7378 100644
--- a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/dct8x8.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/Makefile b/Samples/2_Concepts_and_Techniques/eigenvalues/Makefile
index 93e54441..54c371ea 100644
--- a/Samples/2_Concepts_and_Techniques/eigenvalues/Makefile
+++ b/Samples/2_Concepts_and_Techniques/eigenvalues/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/eigenvalues/NsightEclipse.xml
index 0fce1129..74a13515 100644
--- a/Samples/2_Concepts_and_Techniques/eigenvalues/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/eigenvalues/NsightEclipse.xml
@@ -4,9 +4,9 @@
   <name>eigenvalues</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[The computation of all or a subset of all eigenvalues is an important problem in Linear Algebra, statistics, physics, and many other fields. This sample demonstrates a parallel implementation of a bisection algorithm for the computation of all eigenvalues of a tridiagonal symmetric matrix of arbitrary size with CUDA.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/README.md b/Samples/2_Concepts_and_Techniques/eigenvalues/README.md
index 7d217766..bef2e951 100644
--- a/Samples/2_Concepts_and_Techniques/eigenvalues/README.md
+++ b/Samples/2_Concepts_and_Techniques/eigenvalues/README.md
@@ -10,7 +10,7 @@ Linear Algebra
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy
+cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2017.vcxproj
index 3f2ba5ed..e5a31279 100644
--- a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/eigenvalues.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -122,6 +122,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2019.vcxproj
index 4f6a8152..8f846835 100644
--- a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/eigenvalues.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2022.vcxproj
index 0fd7a89a..32faad5e 100644
--- a/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/eigenvalues/eigenvalues_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/eigenvalues.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/histogram/Makefile b/Samples/2_Concepts_and_Techniques/histogram/Makefile
index d35c575c..c73f8a98 100644
--- a/Samples/2_Concepts_and_Techniques/histogram/Makefile
+++ b/Samples/2_Concepts_and_Techniques/histogram/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/histogram/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/histogram/NsightEclipse.xml
index 6147ce83..72e7c05b 100644
--- a/Samples/2_Concepts_and_Techniques/histogram/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/histogram/NsightEclipse.xml
@@ -3,10 +3,10 @@
 <entry>
   <name>histogram</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates efficient implementation of 64-bin and 256-bin histogram.]]></description>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/histogram/README.md b/Samples/2_Concepts_and_Techniques/histogram/README.md
index 4ec0ce47..8ddf8e58 100644
--- a/Samples/2_Concepts_and_Techniques/histogram/README.md
+++ b/Samples/2_Concepts_and_Techniques/histogram/README.md
@@ -10,7 +10,7 @@ Image Processing, Data Parallel Algorithms
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2017.vcxproj
index f4cff9b5..39dd8378 100644
--- a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/histogram.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -110,6 +110,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2019.vcxproj
index d2f26cb2..5bde10fc 100644
--- a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/histogram.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2022.vcxproj
index 22496c8f..a3e05c1a 100644
--- a/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/histogram/histogram_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/histogram.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/Makefile b/Samples/2_Concepts_and_Techniques/imageDenoising/Makefile
index d452de66..37895080 100644
--- a/Samples/2_Concepts_and_Techniques/imageDenoising/Makefile
+++ b/Samples/2_Concepts_and_Techniques/imageDenoising/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/imageDenoising/NsightEclipse.xml
index 0fbb8631..bef88766 100644
--- a/Samples/2_Concepts_and_Techniques/imageDenoising/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/imageDenoising/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>imageDenoising</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaGLRegisterBufferObject</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaGLRegisterBufferObject</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates two adaptive image denoising techniques: KNN and NLM, based on computation of both geometric and color distance between texels. While both techniques are implemented in the DirectX SDK using shaders, massively speeded up variation of the latter technique, taking advantage of shared memory, is implemented in addition to DirectX counterparts.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -83,6 +83,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/README.md b/Samples/2_Concepts_and_Techniques/imageDenoising/README.md
index e0ef23db..d5741a77 100644
--- a/Samples/2_Concepts_and_Techniques/imageDenoising/README.md
+++ b/Samples/2_Concepts_and_Techniques/imageDenoising/README.md
@@ -10,7 +10,7 @@ Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGLRegisterBufferObject, cudaGraphicsUnmapResources, cudaMemcpy
+cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGLRegisterBufferObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/findgllib.mk b/Samples/2_Concepts_and_Techniques/imageDenoising/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/2_Concepts_and_Techniques/imageDenoising/findgllib.mk
+++ b/Samples/2_Concepts_and_Techniques/imageDenoising/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2017.vcxproj
index 866d8b33..bf03ff5b 100644
--- a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/imageDenoising.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -123,6 +123,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2019.vcxproj
index c924e1a5..facb985e 100644
--- a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/imageDenoising.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -119,6 +119,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2022.vcxproj
index 4af74bf3..ec0b7c63 100644
--- a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/imageDenoising.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -119,6 +119,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/Makefile b/Samples/2_Concepts_and_Techniques/inlinePTX/Makefile
index 481f8a36..6b06a65e 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX/Makefile
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/inlinePTX/NsightEclipse.xml
index dd8e554c..b3298102 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX/NsightEclipse.xml
@@ -3,15 +3,15 @@
 <entry>
   <name>inlinePTX</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaGridSize</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaBlockSize</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[A simple test application that demonstrates a new CUDA 4.0 ability to embed PTX in a CUDA kernel.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -51,6 +51,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/README.md b/Samples/2_Concepts_and_Techniques/inlinePTX/README.md
index 61009f1d..60d100d8 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX/README.md
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX/README.md
@@ -10,7 +10,7 @@ Performance Strategies, PTX Assembly, CUDA Driver API
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMallocHost, cudaGridSize, cudaDeviceSynchronize, cudaBlockSize, cudaFreeHost, cudaMalloc, cudaGetLastError, cudaMemcpy
+cudaMemcpy, cudaFree, cudaMallocHost, cudaGetLastError, cudaGridSize, cudaBlockSize, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2017.vcxproj
index ef564769..b54f0eca 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/inlinePTX.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2019.vcxproj
index c843d4c8..59b6d335 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/inlinePTX.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2022.vcxproj
index 6e0f9bbd..955dd6ca 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/inlinePTX.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md
index 8b06db79..3d99e87e 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md
@@ -10,7 +10,7 @@ Performance Strategies, PTX Assembly, CUDA Driver API, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuModuleGetFunction
+cuMemcpyDtoH, cuLaunchKernel, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 cudaBlockSize, cudaGridSize
@@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2017.vcxproj
index 7b167dbe..3d8dcc9f 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2019.vcxproj
index 4e9539a9..be53ad45 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2022.vcxproj
index 3eb36cb7..87de14f3 100644
--- a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/interval/Makefile b/Samples/2_Concepts_and_Techniques/interval/Makefile
index bb0e46f9..69cf0f08 100644
--- a/Samples/2_Concepts_and_Techniques/interval/Makefile
+++ b/Samples/2_Concepts_and_Techniques/interval/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/interval/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/interval/NsightEclipse.xml
index 5b7f3004..33d957a0 100644
--- a/Samples/2_Concepts_and_Techniques/interval/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/interval/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>interval</name>
   <cuda_api_list>
-    <toolkit>cudaDeviceSetLimit</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaFuncSetCacheConfig</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFuncSetCacheConfig</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaDeviceSetLimit</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[Interval arithmetic operators example.  Uses various C++ features (templates and recursion).  The recursive mode requires Compute SM 2.0 capabilities.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -55,6 +55,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/interval/README.md b/Samples/2_Concepts_and_Techniques/interval/README.md
index 2d336b2e..d13b6e9a 100644
--- a/Samples/2_Concepts_and_Techniques/interval/README.md
+++ b/Samples/2_Concepts_and_Techniques/interval/README.md
@@ -10,7 +10,7 @@ Recursion, Templates
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceSetLimit, cudaFree, cudaFuncSetCacheConfig, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaSetDevice, cudaMalloc, cudaEventDestroy, cudaGetLastError, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFuncSetCacheConfig, cudaMalloc, cudaFree, cudaGetLastError, cudaSetDevice, cudaDeviceSynchronize, cudaEventRecord, cudaDeviceSetLimit, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/interval/interval_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/interval/interval_vs2017.vcxproj
index 937345f7..2c71346d 100644
--- a/Samples/2_Concepts_and_Techniques/interval/interval_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/interval/interval_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/interval.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -213,6 +213,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/interval/interval_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/interval/interval_vs2019.vcxproj
index 0b54fff8..43bea230 100644
--- a/Samples/2_Concepts_and_Techniques/interval/interval_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/interval/interval_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/interval.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -209,6 +209,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/interval/interval_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/interval/interval_vs2022.vcxproj
index 4f5e0b4c..ff04d5b6 100644
--- a/Samples/2_Concepts_and_Techniques/interval/interval_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/interval/interval_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/interval.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -209,6 +209,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/particles/Makefile b/Samples/2_Concepts_and_Techniques/particles/Makefile
index f322ac64..62b19bfb 100644
--- a/Samples/2_Concepts_and_Techniques/particles/Makefile
+++ b/Samples/2_Concepts_and_Techniques/particles/Makefile
@@ -324,9 +324,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/particles/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/particles/NsightEclipse.xml
index 0b16f690..b3fcd5ad 100644
--- a/Samples/2_Concepts_and_Techniques/particles/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/particles/NsightEclipse.xml
@@ -6,19 +6,19 @@
     <flag>--std=c++14</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGLInit</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaInit</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaInit</toolkit>
+    <toolkit>cudaGLInit</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample uses CUDA to simulate and visualize a large set of particles and their physical interaction.  Adding "-particles=<N>" to the command line will allow users to set # of particles for simulation.  This example implements a uniform grid data structure using either atomic operations or a fast radix sort from the Thrust library]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -83,6 +83,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <sources>
     <extraheader>..\..\..\Common\param.h</extraheader>
     <extraheader>..\..\..\Common\paramgl.h</extraheader>
diff --git a/Samples/2_Concepts_and_Techniques/particles/README.md b/Samples/2_Concepts_and_Techniques/particles/README.md
index 0e1b6134..3b1a6974 100644
--- a/Samples/2_Concepts_and_Techniques/particles/README.md
+++ b/Samples/2_Concepts_and_Techniques/particles/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation, Perform
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGLInit, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaInit, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy
+cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaMemset, cudaMemcpyToSymbol, cudaGraphicsGLRegisterBuffer, cudaGraphicsUnregisterResource, cudaMalloc, cudaInit, cudaGLInit
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/particles/findgllib.mk b/Samples/2_Concepts_and_Techniques/particles/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/2_Concepts_and_Techniques/particles/findgllib.mk
+++ b/Samples/2_Concepts_and_Techniques/particles/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/2_Concepts_and_Techniques/particles/particles_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/particles/particles_vs2017.vcxproj
index 8f28c764..d2c2a6f8 100644
--- a/Samples/2_Concepts_and_Techniques/particles/particles_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/particles/particles_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/particles.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -129,6 +129,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/particles/particles_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/particles/particles_vs2019.vcxproj
index fba7fdcc..c739cc74 100644
--- a/Samples/2_Concepts_and_Techniques/particles/particles_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/particles/particles_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/particles.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -125,6 +125,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/particles/particles_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/particles/particles_vs2022.vcxproj
index a915f4c0..77b78a23 100644
--- a/Samples/2_Concepts_and_Techniques/particles/particles_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/particles/particles_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/particles.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -125,6 +125,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/Makefile b/Samples/2_Concepts_and_Techniques/radixSortThrust/Makefile
index df5ee47f..459e9a21 100644
--- a/Samples/2_Concepts_and_Techniques/radixSortThrust/Makefile
+++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/Makefile
@@ -304,9 +304,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/radixSortThrust/NsightEclipse.xml
index c29e9f17..62fd4d55 100644
--- a/Samples/2_Concepts_and_Techniques/radixSortThrust/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/NsightEclipse.xml
@@ -6,13 +6,13 @@
     <flag>--std=c++14</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates a very fast and efficient parallel radix sort uses Thrust library. The included RadixSort class can sort either key-value pairs (with float or unsigned integer keys) or keys only.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -58,6 +58,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/README.md b/Samples/2_Concepts_and_Techniques/radixSortThrust/README.md
index 835bfd17..4914b5d2 100644
--- a/Samples/2_Concepts_and_Techniques/radixSortThrust/README.md
+++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/README.md
@@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaEventDestroy, cudaGetDeviceProperties, cudaGetDevice
+cudaEventSynchronize, cudaEventRecord, cudaGetDevice, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2017.vcxproj
index 159d441e..a9c39d2e 100644
--- a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/radixSortThrust.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2019.vcxproj
index 244e1889..9ac46af9 100644
--- a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/radixSortThrust.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2022.vcxproj
index edef2e2d..30b2d9bb 100644
--- a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/radixSortThrust.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/reduction/Makefile b/Samples/2_Concepts_and_Techniques/reduction/Makefile
index a46a5bb3..eed9f801 100644
--- a/Samples/2_Concepts_and_Techniques/reduction/Makefile
+++ b/Samples/2_Concepts_and_Techniques/reduction/Makefile
@@ -310,9 +310,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/reduction/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/reduction/NsightEclipse.xml
index e7fa89ac..629ec3ff 100644
--- a/Samples/2_Concepts_and_Techniques/reduction/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/reduction/NsightEclipse.xml
@@ -6,13 +6,13 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[A parallel sum reduction that computes the sum of a large arrays of values. This sample demonstrates several important optimization strategies for Data-Parallel Algorithms like reduction using shared memory, __shfl_down_sync, __reduce_add_sync and cooperative_groups reduce.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -67,6 +67,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/reduction/README.md b/Samples/2_Concepts_and_Techniques/reduction/README.md
index 1fde9b55..65024c63 100644
--- a/Samples/2_Concepts_and_Techniques/reduction/README.md
+++ b/Samples/2_Concepts_and_Techniques/reduction/README.md
@@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaFree, cudaSetDevice, cudaDeviceSynchronize, cudaGetDevice, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CPP11](../../../README.md#cpp11)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2017.vcxproj
index 65d31a5d..f80b1766 100644
--- a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/reduction.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2019.vcxproj
index b5b673a3..dea43eab 100644
--- a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/reduction.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2022.vcxproj
index a3f39519..bbc6826a 100644
--- a/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/reduction/reduction_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/reduction.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/Makefile b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/Makefile
index 32bdaeb7..2f62c73e 100644
--- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/Makefile
+++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/Makefile
@@ -322,9 +322,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 61 70 72 75 80 86 87
+SMS ?= 61 70 72 75 80 86 87 90
 else
-SMS ?= 60 61 70 75 80 86
+SMS ?= 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/NsightEclipse.xml
index bc231be2..09decc91 100644
--- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/NsightEclipse.xml
@@ -6,15 +6,15 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
-    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaLaunchCooperativeKernel</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaLaunchCooperativeKernel</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates single pass reduction using Multi Block Cooperative Groups.  This sample requires devices with compute capability 6.0 or higher having compute preemption.]]></description>
   <includepaths>
@@ -51,6 +51,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md
index b0d5e58f..7473bae9 100644
--- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md
+++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md
@@ -10,7 +10,7 @@ Cooperative Groups, MultiBlock Cooperative Groups
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaOccupancyMaxPotentialBlockSize, cudaDeviceSynchronize, cudaSetDevice, cudaMalloc, cudaLaunchCooperativeKernel, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaSetDevice, cudaDeviceSynchronize, cudaLaunchCooperativeKernel, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties, cudaOccupancyMaxPotentialBlockSize
 
 ## Dependencies needed to build/run
 [MBCG](../../../README.md#mbcg), [CPP11](../../../README.md#cpp11)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2017.vcxproj
index 4a505b43..c467625a 100644
--- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/reductionMultiBlockCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2019.vcxproj
index 1e0a9cd1..306b8c6d 100644
--- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/reductionMultiBlockCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2022.vcxproj
index e5b2eb9f..a7261ba7 100644
--- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/reductionMultiBlockCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/Makefile b/Samples/2_Concepts_and_Techniques/scalarProd/Makefile
index e20a3810..9cacc538 100644
--- a/Samples/2_Concepts_and_Techniques/scalarProd/Makefile
+++ b/Samples/2_Concepts_and_Techniques/scalarProd/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/scalarProd/NsightEclipse.xml
index 6d36f764..79a458f3 100644
--- a/Samples/2_Concepts_and_Techniques/scalarProd/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/scalarProd/NsightEclipse.xml
@@ -4,9 +4,9 @@
   <name>scalarProd</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample calculates scalar products of a given set of input vector pairs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/README.md b/Samples/2_Concepts_and_Techniques/scalarProd/README.md
index 3b54a966..47ff8e57 100644
--- a/Samples/2_Concepts_and_Techniques/scalarProd/README.md
+++ b/Samples/2_Concepts_and_Techniques/scalarProd/README.md
@@ -10,7 +10,7 @@ Linear Algebra
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy
+cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2017.vcxproj
index 087bc7df..d404cd61 100644
--- a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/scalarProd.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2019.vcxproj
index bfb89fa1..72e9579a 100644
--- a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/scalarProd.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2022.vcxproj
index 0aabdbf4..cb130eaf 100644
--- a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/scalarProd.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/scan/Makefile b/Samples/2_Concepts_and_Techniques/scan/Makefile
index 5d371f2f..8ce4ab79 100644
--- a/Samples/2_Concepts_and_Techniques/scan/Makefile
+++ b/Samples/2_Concepts_and_Techniques/scan/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/scan/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/scan/NsightEclipse.xml
index 239b68b1..11e6c2f3 100644
--- a/Samples/2_Concepts_and_Techniques/scan/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/scan/NsightEclipse.xml
@@ -4,9 +4,9 @@
   <name>scan</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This example demonstrates an efficient CUDA implementation of parallel prefix sum, also known as "scan".  Given an array of numbers, scan computes a new array in which each element is the sum of all the elements before it in the input array.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -50,6 +50,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/scan/README.md b/Samples/2_Concepts_and_Techniques/scan/README.md
index 8a0563f4..e10d0a38 100644
--- a/Samples/2_Concepts_and_Techniques/scan/README.md
+++ b/Samples/2_Concepts_and_Techniques/scan/README.md
@@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy
+cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/scan/scan_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/scan/scan_vs2017.vcxproj
index 37db8569..ac2bd4bd 100644
--- a/Samples/2_Concepts_and_Techniques/scan/scan_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/scan/scan_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/scan.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/scan/scan_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/scan/scan_vs2019.vcxproj
index b6da5f75..fddb8498 100644
--- a/Samples/2_Concepts_and_Techniques/scan/scan_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/scan/scan_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/scan.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/scan/scan_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/scan/scan_vs2022.vcxproj
index cb3e2bed..4f238933 100644
--- a/Samples/2_Concepts_and_Techniques/scan/scan_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/scan/scan_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/scan.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/Makefile b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/Makefile
index 9f15915f..fe1a04ef 100644
--- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/Makefile
+++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/Makefile
@@ -304,9 +304,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/NsightEclipse.xml
index dd3363ea..bfabb713 100644
--- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/NsightEclipse.xml
@@ -7,13 +7,13 @@
     <flag>--threads 1</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaMemGetInfo</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMemGetInfo</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates an approach to the image segmentation trees construction.  This method is based on Boruvka's MST algorithm.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -60,6 +60,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/README.md b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/README.md
index cd6cbc5a..b6292353 100644
--- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/README.md
+++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/README.md
@@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaMemGetInfo, cudaMemcpy
+cudaMemcpy, cudaMemGetInfo, cudaEventSynchronize, cudaEventRecord, cudaMemset, cudaEventElapsedTime, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2017.vcxproj
index 14b777a6..d72fc3b5 100644
--- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/segmentationTreeThrust.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 1 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2019.vcxproj
index 591302fe..96fd3760 100644
--- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/segmentationTreeThrust.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 1 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2022.vcxproj
index e82eceb4..18e37f1f 100644
--- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTreeThrust_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/segmentationTreeThrust.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 1 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/Makefile b/Samples/2_Concepts_and_Techniques/shfl_scan/Makefile
index 256e10fd..4f210b3e 100644
--- a/Samples/2_Concepts_and_Techniques/shfl_scan/Makefile
+++ b/Samples/2_Concepts_and_Techniques/shfl_scan/Makefile
@@ -310,9 +310,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/shfl_scan/NsightEclipse.xml
index 8410c325..d6060063 100644
--- a/Samples/2_Concepts_and_Techniques/shfl_scan/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/shfl_scan/NsightEclipse.xml
@@ -7,18 +7,18 @@
     <flag>-O3</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This example demonstrates how to use the shuffle intrinsic __shfl_up_sync to perform a scan operation across a thread block. ]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -66,6 +66,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/README.md b/Samples/2_Concepts_and_Techniques/shfl_scan/README.md
index f042bb79..5afaefea 100644
--- a/Samples/2_Concepts_and_Techniques/shfl_scan/README.md
+++ b/Samples/2_Concepts_and_Techniques/shfl_scan/README.md
@@ -10,7 +10,7 @@ Data-Parallel Algorithms, Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaEventRecord, cudaMallocHost, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaFree, cudaMallocHost, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDevice, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [CPP11](../../../README.md#cpp11)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2017.vcxproj
index ee4c6348..9e801711 100644
--- a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/shfl_scan.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2019.vcxproj
index c99b3fe4..72076c47 100644
--- a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/shfl_scan.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2022.vcxproj
index ec328ba6..3ad9db48 100644
--- a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/shfl_scan.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/Makefile b/Samples/2_Concepts_and_Techniques/sortingNetworks/Makefile
index a2de7cb9..7d715f26 100644
--- a/Samples/2_Concepts_and_Techniques/sortingNetworks/Makefile
+++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/sortingNetworks/NsightEclipse.xml
index bec0501a..3ef62721 100644
--- a/Samples/2_Concepts_and_Techniques/sortingNetworks/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/NsightEclipse.xml
@@ -4,9 +4,9 @@
   <name>sortingNetworks</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements bitonic sort and odd-even merge sort (also known as Batcher's sort), algorithms belonging to the class of sorting networks. While generally subefficient, for large sequences compared to algorithms with better asymptotic algorithmic complexity (i.e. merge sort or radix sort), this may be the preferred algorithms of choice for sorting batches of short-sized to mid-sized (key, value) array pairs. Refer to an excellent tutorial by H. W. Lang http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/networks/indexen.htm]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/README.md b/Samples/2_Concepts_and_Techniques/sortingNetworks/README.md
index 905e698f..707ed30d 100644
--- a/Samples/2_Concepts_and_Techniques/sortingNetworks/README.md
+++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/README.md
@@ -10,7 +10,7 @@ Data-Parallel Algorithms
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy
+cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2017.vcxproj
index 0012d67a..7c018efb 100644
--- a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/sortingNetworks.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2019.vcxproj
index 90052760..3181de72 100644
--- a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/sortingNetworks.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2022.vcxproj
index 626d389a..dc83a7b7 100644
--- a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/sortingNetworks.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/Makefile b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/Makefile
index c2e55c39..c672f707 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/Makefile
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/NsightEclipse.xml
index 63511e06..8ca780e9 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/NsightEclipse.xml
@@ -3,20 +3,20 @@
 <entry>
   <name>streamOrderedAllocation</name>
   <cuda_api_list>
+    <toolkit>cudaDeviceGetDefaultMemPool</toolkit>
+    <toolkit>cudaFreeAsync</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
     <toolkit>cudaDeviceGetAttribute</toolkit>
     <toolkit>cudaMallocAsync</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaDeviceGetDefaultMemPool</toolkit>
     <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMemPoolSetAttribute</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMemPoolSetAttribute</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaFreeAsync</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates stream ordered memory allocation on a GPU using cudaMallocAsync and cudaMemPool family of APIs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -48,6 +48,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/README.md b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/README.md
index 2eaa6d1a..4af372ec 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/README.md
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/README.md
@@ -10,7 +10,7 @@ Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaDeviceGetAttribute, cudaMallocAsync, cudaStreamCreateWithFlags, cudaDeviceGetDefaultMemPool, cudaSetDevice, cudaMemPoolSetAttribute, cudaStreamSynchronize, cudaMemcpyAsync, cudaFreeAsync
+cudaDeviceGetDefaultMemPool, cudaFreeAsync, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaDeviceGetAttribute, cudaMallocAsync, cudaSetDevice, cudaEventSynchronize, cudaEventRecord, cudaStreamSynchronize, cudaMemPoolSetAttribute, cudaEventElapsedTime, cudaMemcpyAsync, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj
index 376ea1fa..8f4dc7c0 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/streamOrderedAllocation.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj
index f81e896c..9cd3baae 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/streamOrderedAllocation.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2022.vcxproj
index b61e42f3..6f2d5040 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/streamOrderedAllocation.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile
index b71befbe..41845161 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile
@@ -305,9 +305,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/NsightEclipse.xml
index d583eaaa..b357595e 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/NsightEclipse.xml
@@ -8,28 +8,28 @@
   <cuda_api_list>
     <driver>cuDeviceGetAttribute</driver>
     <driver>cuDeviceGet</driver>
-    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
-    <toolkit>cudaMemPoolImportPointer</toolkit>
-    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
-    <toolkit>cudaMemPoolDestroy</toolkit>
-    <toolkit>cudaMallocAsync</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaDeviceCanAccessPeer</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaMemPoolSetAccess</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaDeviceGetAttribute</toolkit>
-    <toolkit>cudaMemPoolExportPointer</toolkit>
     <toolkit>cudaMemPoolImportFromShareableHandle</toolkit>
-    <toolkit>cudaMemPoolCreate</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaMemPoolExportPointer</toolkit>
     <toolkit>cudaMemPoolGetAccess</toolkit>
-    <toolkit>cudaMemPoolExportToShareableHandle</toolkit>
+    <toolkit>cudaMemPoolDestroy</toolkit>
+    <toolkit>cudaMemPoolSetAccess</toolkit>
+    <toolkit>cudaMallocAsync</toolkit>
+    <toolkit>cudaMemPoolImportPointer</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaDeviceCanAccessPeer</toolkit>
     <toolkit>cudaFreeAsync</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaMemPoolCreate</toolkit>
+    <toolkit>cudaMemPoolExportToShareableHandle</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
+    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates IPC pools of stream ordered memory allocated using cudaMallocAsync and cudaMemPool family of APIs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -63,6 +63,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <sources>
     <extracompilation>../../../Common/helper_multiprocess.cpp</extracompilation>
     <extraheader>../../../Common/helper_multiprocess.h</extraheader>
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md
index 73a90c5e..b3eff96a 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md
@@ -10,7 +10,7 @@ Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -26,11 +26,11 @@ x86_64
 cuDeviceGetAttribute, cuDeviceGet
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceEnablePeerAccess, cudaMemPoolImportPointer, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaMemPoolDestroy, cudaMallocAsync, cudaStreamCreateWithFlags, cudaDeviceCanAccessPeer, cudaMemcpyAsync, cudaStreamDestroy, cudaSetDevice, cudaGetDeviceProperties, cudaMemPoolSetAccess, cudaGetDeviceCount, cudaDeviceGetAttribute, cudaMemPoolExportPointer, cudaMemPoolImportFromShareableHandle, cudaMemPoolCreate, cudaGetLastError, cudaStreamSynchronize, cudaMemPoolGetAccess, cudaMemPoolExportToShareableHandle, cudaFreeAsync
+cudaDeviceGetAttribute, cudaMemPoolImportFromShareableHandle, cudaSetDevice, cudaMemPoolExportPointer, cudaMemPoolGetAccess, cudaMemPoolDestroy, cudaMemPoolSetAccess, cudaMallocAsync, cudaMemPoolImportPointer, cudaGetDeviceCount, cudaMemcpyAsync, cudaDeviceCanAccessPeer, cudaFreeAsync, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaGetLastError, cudaMemPoolCreate, cudaMemPoolExportToShareableHandle, cudaStreamSynchronize, cudaDeviceEnablePeerAccess, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/Makefile b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/Makefile
index 792db21e..75bf6386 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/Makefile
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/NsightEclipse.xml
index 3e51c0ff..f6444442 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/NsightEclipse.xml
@@ -6,21 +6,21 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaStreamWaitEvent</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaMemPoolSetAccess</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaMallocAsync</toolkit>
-    <toolkit>cudaDeviceGetAttribute</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaDeviceCanAccessPeer</toolkit>
     <toolkit>cudaDeviceGetDefaultMemPool</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaFreeAsync</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
+    <toolkit>cudaMemPoolSetAccess</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaDeviceGetAttribute</toolkit>
+    <toolkit>cudaMallocAsync</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaStreamWaitEvent</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaDeviceCanAccessPeer</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates peer-to-peer access of stream ordered memory allocated using cudaMallocAsync and cudaMemPool family of APIs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md
index 9ef3fa17..0b2a83d9 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md
@@ -10,7 +10,7 @@ Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamWaitEvent, cudaStreamDestroy, cudaMemPoolSetAccess, cudaEventRecord, cudaEventCreate, cudaGetDeviceCount, cudaMallocAsync, cudaDeviceGetAttribute, cudaStreamCreateWithFlags, cudaDeviceCanAccessPeer, cudaDeviceGetDefaultMemPool, cudaSetDevice, cudaStreamSynchronize, cudaMemcpyAsync, cudaFreeAsync
+cudaDeviceGetDefaultMemPool, cudaFreeAsync, cudaStreamCreateWithFlags, cudaMemPoolSetAccess, cudaStreamDestroy, cudaDeviceGetAttribute, cudaMallocAsync, cudaSetDevice, cudaGetDeviceCount, cudaEventRecord, cudaStreamSynchronize, cudaStreamWaitEvent, cudaMemcpyAsync, cudaDeviceCanAccessPeer, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj
index 5b9ec320..5ad486fc 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/streamOrderedAllocationP2P.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj
index 79b1ce85..a7248a6e 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/streamOrderedAllocationP2P.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2022.vcxproj
index 6b468f18..0ea72145 100644
--- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/streamOrderedAllocationP2P.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/Makefile b/Samples/2_Concepts_and_Techniques/threadFenceReduction/Makefile
index e3f4586b..37b8a9b4 100644
--- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/Makefile
+++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/threadFenceReduction/NsightEclipse.xml
index e997b187..8436fef3 100644
--- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/NsightEclipse.xml
+++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/NsightEclipse.xml
@@ -3,10 +3,10 @@
 <entry>
   <name>threadFenceReduction</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample shows how to perform a reduction operation on an array of values using the thread Fence intrinsic to produce a single value in a single kernel (as opposed to two or more kernel calls as shown in the "reduction" CUDA Sample).  Single-pass reduction requires global atomic instructions (Compute Capability 2.0 or later) and the _threadfence() intrinsic (CUDA 2.2 or later).]]></description>
@@ -48,6 +48,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/README.md b/Samples/2_Concepts_and_Techniques/threadFenceReduction/README.md
index 4ce80f5c..0156a5f1 100644
--- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/README.md
+++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/README.md
@@ -10,7 +10,7 @@ Cooperative Groups, Data-Parallel Algorithms, Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2017.vcxproj
index 4b0cf87e..0d547054 100644
--- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/threadFenceReduction.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2019.vcxproj
index b15f148f..ca4df79f 100644
--- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/threadFenceReduction.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2022.vcxproj
index 157d11dc..2e5fa2f9 100644
--- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/threadFenceReduction.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/Makefile b/Samples/2_Concepts_and_Techniques/threadMigration/Makefile
index 3acd8454..6c4d542d 100644
--- a/Samples/2_Concepts_and_Techniques/threadMigration/Makefile
+++ b/Samples/2_Concepts_and_Techniques/threadMigration/Makefile
@@ -283,9 +283,9 @@ FATBIN_FILE := threadMigration_kernel${TARGET_SIZE}.fatbin
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(GENCODE_FLAGS),)
diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/README.md b/Samples/2_Concepts_and_Techniques/threadMigration/README.md
index 22a095da..801305ce 100644
--- a/Samples/2_Concepts_and_Techniques/threadMigration/README.md
+++ b/Samples/2_Concepts_and_Techniques/threadMigration/README.md
@@ -10,7 +10,7 @@ CUDA Driver API
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuModuleLoadData, cuDeviceGetCount, cuCtxCreate, cuCtxPopCurrent, cuDeviceGetName, cuLaunchKernel, cuMemAlloc, cuMemcpyDtoH, cuModuleUnload, cuCtxPushCurrent, cuDeviceGet, cuMemFree, cuInit, cuCtxDestroy, cuDeviceGetAttribute
+cuMemcpyDtoH, cuLaunchKernel, cuModuleLoadData, cuDeviceGetName, cuDeviceGet, cuDeviceGetAttribute, cuMemAlloc, cuMemFree, cuCtxDestroy, cuCtxPopCurrent, cuModuleUnload, cuDeviceGetCount, cuModuleGetFunction, cuCtxCreate, cuCtxPushCurrent, cuInit
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2017.vcxproj b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2017.vcxproj
index 8a38efdd..ba53a46a 100644
--- a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2017.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/threadMigration.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2019.vcxproj b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2019.vcxproj
index 05df36d8..6f0f8a90 100644
--- a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2019.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/threadMigration.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2022.vcxproj b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2022.vcxproj
index cb4ebcc8..b81a2b07 100644
--- a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2022.vcxproj
+++ b/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/threadMigration.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,compute_35;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/StreamPriorities/Makefile b/Samples/3_CUDA_Features/StreamPriorities/Makefile
index 9b1734f5..6faeb198 100644
--- a/Samples/3_CUDA_Features/StreamPriorities/Makefile
+++ b/Samples/3_CUDA_Features/StreamPriorities/Makefile
@@ -297,9 +297,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/StreamPriorities/NsightEclipse.xml b/Samples/3_CUDA_Features/StreamPriorities/NsightEclipse.xml
index b3e3aab7..0cb59d58 100644
--- a/Samples/3_CUDA_Features/StreamPriorities/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/StreamPriorities/NsightEclipse.xml
@@ -3,15 +3,15 @@
 <entry>
   <name>StreamPriorities</name>
   <cuda_api_list>
-    <toolkit>cudaDeviceGetStreamPriorityRange</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamCreateWithPriority</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaStreamCreateWithPriority</toolkit>
+    <toolkit>cudaDeviceGetStreamPriorityRange</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates basic use of stream priorities.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -54,6 +54,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/StreamPriorities/README.md b/Samples/3_CUDA_Features/StreamPriorities/README.md
index 52150773..0b616b49 100644
--- a/Samples/3_CUDA_Features/StreamPriorities/README.md
+++ b/Samples/3_CUDA_Features/StreamPriorities/README.md
@@ -10,7 +10,7 @@ CUDA Streams and Events
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceGetStreamPriorityRange, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaStreamCreateWithPriority, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaStreamCreateWithPriority, cudaDeviceGetStreamPriorityRange, cudaEventSynchronize, cudaEventRecord, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [Stream-Priorities](../../../README.md#stream-priorities)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/Makefile b/Samples/3_CUDA_Features/bf16TensorCoreGemm/Makefile
index 3d8b8dec..29fdcbae 100644
--- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/Makefile
+++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/Makefile
@@ -322,9 +322,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 80 86 87
+SMS ?= 80 86 87 90
 else
-SMS ?= 80 86
+SMS ?= 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/bf16TensorCoreGemm/NsightEclipse.xml
index a9313696..1eac8e5c 100644
--- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/NsightEclipse.xml
@@ -6,18 +6,18 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaFuncSetAttribute</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaFuncSetAttribute</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA sample demonstrating __nv_bfloat16 (e8m7) GEMM computation using the Warp Matrix Multiply and Accumulate (WMMA) API introduced with CUDA 11 in Ampere chip family tensor cores for faster matrix operations. This sample also uses async copy provided by cuda pipeline interface for gmem to shmem async loads which improves kernel performance and reduces register presssure.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/README.md b/Samples/3_CUDA_Features/bf16TensorCoreGemm/README.md
index 7a8b729b..30f4eece 100644
--- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/README.md
+++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/README.md
@@ -10,7 +10,7 @@ Matrix Multiply, WMMA, Tensor Cores
 
 ## Supported SM Architectures
 
-[SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [CPP11](../../../README.md#cpp11)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj
index bfab31dc..b8cb9fb2 100644
--- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/bf16TensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj
index 4240024c..c2c1f920 100644
--- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bf16TensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2022.vcxproj
index ecbfc6d5..774b45bf 100644
--- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bf16TensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/Makefile b/Samples/3_CUDA_Features/binaryPartitionCG/Makefile
index d1bdbc6d..ace0b3ae 100644
--- a/Samples/3_CUDA_Features/binaryPartitionCG/Makefile
+++ b/Samples/3_CUDA_Features/binaryPartitionCG/Makefile
@@ -310,9 +310,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/NsightEclipse.xml b/Samples/3_CUDA_Features/binaryPartitionCG/NsightEclipse.xml
index b611d7c1..96982217 100644
--- a/Samples/3_CUDA_Features/binaryPartitionCG/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/binaryPartitionCG/NsightEclipse.xml
@@ -6,15 +6,15 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
-    <toolkit>cudaMemsetAsync</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemsetAsync</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample is a simple code that illustrates binary partition cooperative groups and reduce within the thread block.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -54,6 +54,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/README.md b/Samples/3_CUDA_Features/binaryPartitionCG/README.md
index c40ff6bd..d2c29682 100644
--- a/Samples/3_CUDA_Features/binaryPartitionCG/README.md
+++ b/Samples/3_CUDA_Features/binaryPartitionCG/README.md
@@ -10,7 +10,7 @@ Cooperative Groups
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMallocHost, cudaOccupancyMaxPotentialBlockSize, cudaMemsetAsync, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemsetAsync, cudaMemcpyAsync, cudaOccupancyMaxPotentialBlockSize
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj
index cd3fccf0..be9569bb 100644
--- a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/binaryPartitionCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj
index 5b7320ce..c3c28362 100644
--- a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/binaryPartitionCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2022.vcxproj b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2022.vcxproj
index df9ea16f..594d4bd3 100644
--- a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/binaryPartitionCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/bindlessTexture/Makefile b/Samples/3_CUDA_Features/bindlessTexture/Makefile
index 4310ee3e..9e9c3369 100644
--- a/Samples/3_CUDA_Features/bindlessTexture/Makefile
+++ b/Samples/3_CUDA_Features/bindlessTexture/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/bindlessTexture/NsightEclipse.xml b/Samples/3_CUDA_Features/bindlessTexture/NsightEclipse.xml
index 247f1255..25b63ad8 100644
--- a/Samples/3_CUDA_Features/bindlessTexture/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/bindlessTexture/NsightEclipse.xml
@@ -3,28 +3,28 @@
 <entry>
   <name>bindlessTexture</name>
   <cuda_api_list>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGetMipmappedArrayLevel</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaArrayGetInfo</toolkit>
     <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaFreeMipmappedArray</toolkit>
+    <toolkit>cudaGetMipmappedArrayLevel</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaDestroySurfaceObject</toolkit>
-    <toolkit>cudaPitchedPtr</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaExtent</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaCreateSurfaceObject</toolkit>
     <toolkit>cudaMallocMipmappedArray</toolkit>
+    <toolkit>cudaPitchedPtr</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
+    <toolkit>cudaArrayGetInfo</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
+    <toolkit>cudaFreeMipmappedArray</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
   </cuda_api_list>
   <description><![CDATA[This example demonstrates use of cudaSurfaceObject, cudaTextureObject, and MipMap support in CUDA.  A GPU with Compute Capability SM 3.0 is required to run the sample.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -85,6 +85,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/bindlessTexture/README.md b/Samples/3_CUDA_Features/bindlessTexture/README.md
index 4047f08a..cf14ba1a 100644
--- a/Samples/3_CUDA_Features/bindlessTexture/README.md
+++ b/Samples/3_CUDA_Features/bindlessTexture/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Texture
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGraphicsMapResources, cudaGetMipmappedArrayLevel, cudaGraphicsResourceGetMappedPointer, cudaArrayGetInfo, cudaMemcpy, cudaFreeMipmappedArray, cudaDestroySurfaceObject, cudaPitchedPtr, cudaMalloc, cudaGraphicsUnregisterResource, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaGetLastError, cudaFree, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaExtent, cudaCreateSurfaceObject, cudaMallocMipmappedArray, cudaCreateTextureObject, cudaMallocArray, cudaGraphicsUnmapResources
+cudaMemcpy, cudaGetMipmappedArrayLevel, cudaGraphicsMapResources, cudaDestroySurfaceObject, cudaExtent, cudaDeviceSynchronize, cudaCreateSurfaceObject, cudaMallocMipmappedArray, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaCreateTextureObject, cudaGraphicsUnmapResources, cudaMallocArray, cudaFreeArray, cudaArrayGetInfo, cudaGetLastError, cudaDestroyTextureObject, cudaGraphicsGLRegisterBuffer, cudaFreeMipmappedArray, cudaFree, cudaGraphicsUnregisterResource, cudaMalloc
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2017.vcxproj b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2017.vcxproj
index c7434315..bcc1990a 100644
--- a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/bindlessTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2019.vcxproj b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2019.vcxproj
index d18b1ac4..110d990f 100644
--- a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bindlessTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2022.vcxproj b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2022.vcxproj
index e08edee0..f9bcc8ae 100644
--- a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bindlessTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/bindlessTexture/findgllib.mk b/Samples/3_CUDA_Features/bindlessTexture/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/3_CUDA_Features/bindlessTexture/findgllib.mk
+++ b/Samples/3_CUDA_Features/bindlessTexture/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/Makefile b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/Makefile
index 85498be3..5eab1414 100644
--- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/Makefile
+++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/Makefile
@@ -310,9 +310,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 61 70 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/NsightEclipse.xml
index 714aab28..383410d3 100644
--- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/NsightEclipse.xml
@@ -8,20 +8,20 @@
     <flag>--std=c++14</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaPeekAtLastError</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaPeekAtLastError</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates an advanced quicksort implemented using CUDA Dynamic Parallelism.  This sample requires devices with compute capability 3.5 or higher.]]></description>
   <includepaths>
@@ -63,6 +63,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/README.md b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/README.md
index b3f33419..1314b89b 100644
--- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/README.md
+++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/README.md
@@ -10,7 +10,7 @@ Cooperative Groups, CUDA Dynamic Parallelism
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaPeekAtLastError, cudaMalloc, cudaStreamCreateWithFlags, cudaGetLastError, cudaMemcpyAsync, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaStreamCreateWithFlags, cudaMemcpy, cudaMemcpyAsync, cudaFree, cudaGetErrorString, cudaGetLastError, cudaPeekAtLastError, cudaDeviceSynchronize, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [CDP](../../../README.md#cdp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2017.vcxproj
index 6244cf4f..5c76d1c7 100644
--- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cdpAdvancedQuicksort.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2019.vcxproj
index 68071f8a..4779bb51 100644
--- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpAdvancedQuicksort.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2022.vcxproj
index 462ed63c..39da5407 100644
--- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpAdvancedQuicksort.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/Makefile b/Samples/3_CUDA_Features/cdpBezierTessellation/Makefile
index 4f89e84d..23ef29e8 100644
--- a/Samples/3_CUDA_Features/cdpBezierTessellation/Makefile
+++ b/Samples/3_CUDA_Features/cdpBezierTessellation/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 61 70 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpBezierTessellation/NsightEclipse.xml
index 5566265e..26392f7b 100644
--- a/Samples/3_CUDA_Features/cdpBezierTessellation/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/cdpBezierTessellation/NsightEclipse.xml
@@ -6,10 +6,10 @@
     <flag>-dc</flag>
   </cflags>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates bezier tessellation of lines implemented using CUDA Dynamic Parallelism.  This sample requires devices with compute capability 3.5 or higher.]]></description>
@@ -50,6 +50,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/README.md b/Samples/3_CUDA_Features/cdpBezierTessellation/README.md
index 30ba3375..bb2d6e63 100644
--- a/Samples/3_CUDA_Features/cdpBezierTessellation/README.md
+++ b/Samples/3_CUDA_Features/cdpBezierTessellation/README.md
@@ -10,7 +10,7 @@ CUDA Dynamic Parallelism
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGetDeviceCount, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaGetDeviceCount, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CDP](../../../README.md#cdp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2017.vcxproj
index 6436a8ad..e7733bf8 100644
--- a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cdpBezierTessellation.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2019.vcxproj
index 1452f331..aa2fefe0 100644
--- a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpBezierTessellation.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2022.vcxproj
index b1eb0d98..577c5e19 100644
--- a/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/cdpBezierTessellation/cdpBezierTessellation_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpBezierTessellation.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpQuadtree/Makefile b/Samples/3_CUDA_Features/cdpQuadtree/Makefile
index fc39f0e0..9b4e08f8 100644
--- a/Samples/3_CUDA_Features/cdpQuadtree/Makefile
+++ b/Samples/3_CUDA_Features/cdpQuadtree/Makefile
@@ -310,9 +310,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 61 70 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/cdpQuadtree/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpQuadtree/NsightEclipse.xml
index 63b02230..6d867f7f 100644
--- a/Samples/3_CUDA_Features/cdpQuadtree/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/cdpQuadtree/NsightEclipse.xml
@@ -7,11 +7,11 @@
     <flag>--std=c++14</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaDeviceSetLimit</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaDeviceSetLimit</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates Quad Trees implemented using CUDA Dynamic Parallelism. This sample requires devices with compute capability 3.5 or higher.]]></description>
@@ -54,6 +54,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/cdpQuadtree/README.md b/Samples/3_CUDA_Features/cdpQuadtree/README.md
index a170e47e..dc96c3c5 100644
--- a/Samples/3_CUDA_Features/cdpQuadtree/README.md
+++ b/Samples/3_CUDA_Features/cdpQuadtree/README.md
@@ -10,7 +10,7 @@ Cooperative Groups, CUDA Dynamic Parallelism
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceSetLimit, cudaFree, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaGetLastError, cudaDeviceSetLimit, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CDP](../../../README.md#cdp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2017.vcxproj
index b0cd8376..15110cd3 100644
--- a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cdpQuadtree.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2019.vcxproj
index 99bfdc1d..3ec1b136 100644
--- a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpQuadtree.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2022.vcxproj
index d2e08299..181408c3 100644
--- a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpQuadtree.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/Makefile b/Samples/3_CUDA_Features/cdpSimplePrint/Makefile
index d64d69da..1ea64428 100644
--- a/Samples/3_CUDA_Features/cdpSimplePrint/Makefile
+++ b/Samples/3_CUDA_Features/cdpSimplePrint/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 61 70 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpSimplePrint/NsightEclipse.xml
index d1d5fd2f..cfe32d19 100644
--- a/Samples/3_CUDA_Features/cdpSimplePrint/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/cdpSimplePrint/NsightEclipse.xml
@@ -6,10 +6,10 @@
     <flag>-dc</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaDeviceSetLimit</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDeviceSetLimit</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates simple printf implemented using CUDA Dynamic Parallelism.  This sample requires devices with compute capability 3.5 or higher.]]></description>
   <includepaths>
@@ -51,6 +51,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/README.md b/Samples/3_CUDA_Features/cdpSimplePrint/README.md
index ab868271..c872f4bb 100644
--- a/Samples/3_CUDA_Features/cdpSimplePrint/README.md
+++ b/Samples/3_CUDA_Features/cdpSimplePrint/README.md
@@ -10,7 +10,7 @@ CUDA Dynamic Parallelism
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceSetLimit, cudaGetLastError, cudaDeviceSynchronize, cudaGetDeviceProperties
+cudaDeviceSynchronize, cudaGetLastError, cudaGetDeviceProperties, cudaDeviceSetLimit
 
 ## Dependencies needed to build/run
 [CDP](../../../README.md#cdp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2017.vcxproj
index 63502de3..b2613487 100644
--- a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cdpSimplePrint.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2019.vcxproj
index d8719248..ae105c0a 100644
--- a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpSimplePrint.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2022.vcxproj
index bc59636b..7f1b73f1 100644
--- a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpSimplePrint.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/Makefile b/Samples/3_CUDA_Features/cdpSimpleQuicksort/Makefile
index 5771acad..1bf519d5 100644
--- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/Makefile
+++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 61 70 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/NsightEclipse.xml b/Samples/3_CUDA_Features/cdpSimpleQuicksort/NsightEclipse.xml
index 82714619..9c196081 100644
--- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/NsightEclipse.xml
@@ -6,13 +6,13 @@
     <flag>-dc</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaDeviceSetLimit</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaDeviceSetLimit</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates simple quicksort implemented using CUDA Dynamic Parallelism.  This sample requires devices with compute capability 3.5 or higher.]]></description>
@@ -55,6 +55,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/README.md b/Samples/3_CUDA_Features/cdpSimpleQuicksort/README.md
index 0805d380..5a765c0a 100644
--- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/README.md
+++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/README.md
@@ -10,7 +10,7 @@ CUDA Dynamic Parallelism
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceSetLimit, cudaFree, cudaStreamDestroy, cudaDeviceSynchronize, cudaMalloc, cudaStreamCreateWithFlags, cudaMemcpy, cudaGetDeviceProperties
+cudaStreamCreateWithFlags, cudaMemcpy, cudaStreamDestroy, cudaFree, cudaDeviceSynchronize, cudaDeviceSetLimit, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CDP](../../../README.md#cdp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2017.vcxproj b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2017.vcxproj
index 1f396df6..701ef7df 100644
--- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cdpSimpleQuicksort.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2019.vcxproj b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2019.vcxproj
index e926d0d1..be79af0a 100644
--- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpSimpleQuicksort.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2022.vcxproj b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2022.vcxproj
index bf24f8f3..601d665a 100644
--- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cdpSimpleQuicksort.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/Makefile b/Samples/3_CUDA_Features/cudaCompressibleMemory/Makefile
index 7118ad8c..9d7f9adf 100644
--- a/Samples/3_CUDA_Features/cudaCompressibleMemory/Makefile
+++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/Makefile
@@ -293,9 +293,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/README.md b/Samples/3_CUDA_Features/cudaCompressibleMemory/README.md
index e95e4904..cd28b6f2 100644
--- a/Samples/3_CUDA_Features/cudaCompressibleMemory/README.md
+++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/README.md
@@ -10,7 +10,7 @@ CUDA Driver API, Compressible Memory, MMAP
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuMemRelease, cuCtxGetDevice, cuMemGetAllocationPropertiesFromHandle, cuMemSetAccess, cuMemMap, cuMemCreate, cuMemAddressFree, cuMemGetAllocationGranularity, cuMemUnmap, cuMemAddressReserve, cuDeviceGetAttribute
+cuMemGetAllocationPropertiesFromHandle, cuMemCreate, cuDeviceGetAttribute, cuCtxGetDevice, cuMemGetAllocationGranularity, cuMemAddressFree, cuMemUnmap, cuMemMap, cuMemRelease, cuMemAddressReserve, cuMemSetAccess
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaEventRecord, cudaEventCreate, cudaOccupancyMaxPotentialBlockSize, cudaEventElapsedTime, cudaEventSynchronize, cudaMemcpy
+cudaMemcpy, cudaEventSynchronize, cudaEventRecord, cudaEventElapsedTime, cudaOccupancyMaxPotentialBlockSize, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj
index cb21616c..5fd82a2d 100644
--- a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cudaCompressibleMemory.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj
index f4763a3e..f0138278 100644
--- a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cudaCompressibleMemory.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2022.vcxproj b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2022.vcxproj
index 1a116a85..5c5b989c 100644
--- a/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/cudaCompressibleMemory/cudaCompressibleMemory_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cudaCompressibleMemory.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/Makefile b/Samples/3_CUDA_Features/cudaTensorCoreGemm/Makefile
index cd5ed03c..a949034f 100644
--- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/Makefile
+++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/Makefile
@@ -291,9 +291,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 70 72 75 80 86 87
+SMS ?= 70 72 75 80 86 87 90
 else
-SMS ?= 70 75 80 86
+SMS ?= 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/cudaTensorCoreGemm/NsightEclipse.xml
index 25dc757d..e4306062 100644
--- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/NsightEclipse.xml
@@ -6,18 +6,18 @@
     <flag>-maxrregcount=255</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaFuncSetAttribute</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaFuncSetAttribute</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[CUDA sample demonstrating a GEMM computation using the Warp Matrix Multiply and Accumulate (WMMA) API introduced in CUDA 9.
 
@@ -52,6 +52,7 @@ In addition to that, it demonstrates the use of the new CUDA function attribute
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/README.md b/Samples/3_CUDA_Features/cudaTensorCoreGemm/README.md
index 5c7a02ca..ed9ca03e 100644
--- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/README.md
+++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/README.md
@@ -14,7 +14,7 @@ Matrix Multiply, WMMA, Tensor Cores
 
 ## Supported SM Architectures
 
-[SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -27,11 +27,11 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj
index 6f38472d..a5854708 100644
--- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cudaTensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj
index 32c37bb4..5a7700ba 100644
--- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cudaTensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2022.vcxproj
index 24801f67..3a4f102a 100644
--- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cudaTensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/Makefile b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/Makefile
index e8ed96d9..a8731ad2 100644
--- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/Makefile
+++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/Makefile
@@ -322,9 +322,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 80 86 87
+SMS ?= 80 86 87 90
 else
-SMS ?= 80 86
+SMS ?= 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/NsightEclipse.xml
index 2cd65814..f28f86b3 100644
--- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/NsightEclipse.xml
@@ -6,18 +6,18 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaFuncSetAttribute</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaFuncSetAttribute</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[CUDA sample demonstrates double precision GEMM computation using the Double precision Warp Matrix Multiply and Accumulate (WMMA) API introduced with CUDA 11 in Ampere chip family tensor cores for faster matrix operations. This sample also uses async copy provided by cuda pipeline interface for gmem to shmem async loads which improves kernel performance and reduces register presssure. Further, this sample also demonstrates how to use cooperative groups async copy interface over a group for performing gmem to shmem async loads.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/README.md b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/README.md
index 13b8e8e4..8699aa21 100644
--- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/README.md
+++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/README.md
@@ -10,7 +10,7 @@ Matrix Multiply, WMMA, Tensor Cores
 
 ## Supported SM Architectures
 
-[SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [CPP11](../../../README.md#cpp11)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj
index 8e82e63e..1dbcff5e 100644
--- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/dmmaTensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj
index 7c6849bd..0f024a26 100644
--- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/dmmaTensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2022.vcxproj
index 480cc0b8..6dcc0232 100644
--- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/dmmaTensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/Makefile b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/Makefile
index 7a7fd2fa..6fdd9aab 100644
--- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/Makefile
+++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/Makefile
@@ -310,9 +310,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 70 72 75 80 86 87
+SMS ?= 70 72 75 80 86 87 90
 else
-SMS ?= 70 75 80 86
+SMS ?= 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/NsightEclipse.xml b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/NsightEclipse.xml
index 60eb92c8..83bc6d5c 100644
--- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/NsightEclipse.xml
@@ -6,20 +6,20 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaMemsetAsync</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaDeviceGetAttribute</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaDeviceGetAttribute</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaMemsetAsync</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements matrix multiplication which uses asynchronous copy of data from global to shared memory when on compute capability 8.0 or higher. Also demonstrates arrive-wait barrier for synchronization.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -59,6 +59,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/README.md b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/README.md
index b5adb76f..a2d7d6a3 100644
--- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/README.md
+++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra, CPP11 CUDA
 
 ## Supported SM Architectures
 
-[SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaMallocHost, cudaEventCreate, cudaMemsetAsync, cudaEventElapsedTime, cudaEventSynchronize, cudaDeviceGetAttribute, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaEventDestroy, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaMalloc, cudaDeviceGetAttribute, cudaFree, cudaMallocHost, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaStreamSynchronize, cudaEventDestroy, cudaEventElapsedTime, cudaMemsetAsync, cudaMemcpyAsync, cudaEventCreate
 
 ## Dependencies needed to build/run
 [CPP11](../../../README.md#cpp11)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj
index 100834d7..41bedad1 100644
--- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/globalToShmemAsyncCopy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj
index 5f1c0721..7db3f231 100644
--- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/globalToShmemAsyncCopy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2022.vcxproj b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2022.vcxproj
index 2b82306f..5351130d 100644
--- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/globalToShmemAsyncCopy.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/Makefile b/Samples/3_CUDA_Features/graphMemoryFootprint/Makefile
index f4647da1..0233718b 100644
--- a/Samples/3_CUDA_Features/graphMemoryFootprint/Makefile
+++ b/Samples/3_CUDA_Features/graphMemoryFootprint/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/NsightEclipse.xml b/Samples/3_CUDA_Features/graphMemoryFootprint/NsightEclipse.xml
index 72be4775..7f7d842b 100644
--- a/Samples/3_CUDA_Features/graphMemoryFootprint/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/graphMemoryFootprint/NsightEclipse.xml
@@ -3,23 +3,23 @@
 <entry>
   <name>graphMemoryFootprint</name>
   <cuda_api_list>
-    <toolkit>cudaGraphAddMemFreeNode</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphExecDestroy</toolkit>
-    <toolkit>cudaGraphInstantiate</toolkit>
-    <toolkit>cudaDeviceGetAttribute</toolkit>
-    <toolkit>cudaDriverGetVersion</toolkit>
-    <toolkit>cudaGraphCreate</toolkit>
-    <toolkit>cudaGraphAddKernelNode</toolkit>
     <toolkit>cudaGraphAddMemAllocNode</toolkit>
     <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaDeviceGraphMemTrim</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaGraphInstantiate</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaDeviceGetAttribute</toolkit>
+    <toolkit>cudaGraphAddKernelNode</toolkit>
+    <toolkit>cudaGraphAddMemFreeNode</toolkit>
     <toolkit>cudaDeviceGetGraphMemAttribute</toolkit>
+    <toolkit>cudaGraphCreate</toolkit>
     <toolkit>cudaGraphDestroy</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDriverGetVersion</toolkit>
     <toolkit>cudaGraphLaunch</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaDeviceGraphMemTrim</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaGraphExecDestroy</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates how graph memory nodes re-use virtual addresses and physical memory.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -56,6 +56,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/README.md b/Samples/3_CUDA_Features/graphMemoryFootprint/README.md
index 76c04cf7..6286fa0d 100644
--- a/Samples/3_CUDA_Features/graphMemoryFootprint/README.md
+++ b/Samples/3_CUDA_Features/graphMemoryFootprint/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API, Performance Strategies, CUDA Graphs
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGraphAddMemFreeNode, cudaStreamDestroy, cudaFree, cudaGraphExecDestroy, cudaGraphInstantiate, cudaDeviceGetAttribute, cudaDriverGetVersion, cudaGraphCreate, cudaGraphAddKernelNode, cudaGraphAddMemAllocNode, cudaStreamCreateWithFlags, cudaDeviceGraphMemTrim, cudaStreamSynchronize, cudaDeviceGetGraphMemAttribute, cudaGraphDestroy, cudaGetDeviceProperties, cudaGraphLaunch
+cudaGraphAddMemAllocNode, cudaStreamCreateWithFlags, cudaGraphInstantiate, cudaStreamDestroy, cudaFree, cudaDeviceGetAttribute, cudaGraphAddKernelNode, cudaGraphAddMemFreeNode, cudaDeviceGetGraphMemAttribute, cudaGraphCreate, cudaGraphDestroy, cudaDriverGetVersion, cudaGraphLaunch, cudaStreamSynchronize, cudaDeviceGraphMemTrim, cudaGetDeviceProperties, cudaGraphExecDestroy
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2017.vcxproj b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2017.vcxproj
index 412492cb..e65d0b66 100644
--- a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/graphMemoryFootprint.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2019.vcxproj b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2019.vcxproj
index 28c07316..82b98142 100644
--- a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/graphMemoryFootprint.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2022.vcxproj b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2022.vcxproj
index 5eaef34b..cbf3dd31 100644
--- a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/graphMemoryFootprint.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/Makefile b/Samples/3_CUDA_Features/graphMemoryNodes/Makefile
index a233d14b..b760fc44 100644
--- a/Samples/3_CUDA_Features/graphMemoryNodes/Makefile
+++ b/Samples/3_CUDA_Features/graphMemoryNodes/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/NsightEclipse.xml b/Samples/3_CUDA_Features/graphMemoryNodes/NsightEclipse.xml
index 5fd9f688..a2cc3608 100644
--- a/Samples/3_CUDA_Features/graphMemoryNodes/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/graphMemoryNodes/NsightEclipse.xml
@@ -3,32 +3,32 @@
 <entry>
   <name>graphMemoryNodes</name>
   <cuda_api_list>
-    <toolkit>cudaMallocAsync</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaMallocManaged</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaDriverGetVersion</toolkit>
-    <toolkit>cudaGraphCreate</toolkit>
-    <toolkit>cudaGraphAddMemAllocNode</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaStreamEndCapture</toolkit>
-    <toolkit>cudaGraphExecDestroy</toolkit>
-    <toolkit>cudaStreamBeginCapture</toolkit>
     <toolkit>cudaDeviceGetAttribute</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaGraphDestroy</toolkit>
+    <toolkit>cudaDriverGetVersion</toolkit>
     <toolkit>cudaGraphLaunch</toolkit>
-    <toolkit>cudaGraphAddMemFreeNode</toolkit>
-    <toolkit>cudaStreamWaitEvent</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaGraphInstantiate</toolkit>
-    <toolkit>cudaGraphAddKernelNode</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaMallocAsync</toolkit>
+    <toolkit>cudaStreamEndCapture</toolkit>
+    <toolkit>cudaMallocManaged</toolkit>
+    <toolkit>cudaGraphCreate</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaFreeAsync</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
+    <toolkit>cudaGraphInstantiate</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaStreamBeginCapture</toolkit>
+    <toolkit>cudaStreamWaitEvent</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
+    <toolkit>cudaGraphAddMemAllocNode</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGraphAddKernelNode</toolkit>
+    <toolkit>cudaGraphAddMemFreeNode</toolkit>
+    <toolkit>cudaGraphDestroy</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphExecDestroy</toolkit>
   </cuda_api_list>
   <description><![CDATA[A demonstration of memory allocations and frees within CUDA graphs using Graph APIs and Stream Capture APIs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -67,6 +67,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/README.md b/Samples/3_CUDA_Features/graphMemoryNodes/README.md
index f3e934e1..7bf467a4 100644
--- a/Samples/3_CUDA_Features/graphMemoryNodes/README.md
+++ b/Samples/3_CUDA_Features/graphMemoryNodes/README.md
@@ -10,7 +10,7 @@ CUDA Graphs, Stream Capture
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMallocAsync, cudaStreamCreateWithFlags, cudaMemcpy, cudaMemcpyAsync, cudaStreamDestroy, cudaMallocManaged, cudaEventCreate, cudaDriverGetVersion, cudaGraphCreate, cudaGraphAddMemAllocNode, cudaMalloc, cudaEventDestroy, cudaStreamEndCapture, cudaGraphExecDestroy, cudaStreamBeginCapture, cudaDeviceGetAttribute, cudaStreamSynchronize, cudaGraphDestroy, cudaGraphLaunch, cudaGraphAddMemFreeNode, cudaStreamWaitEvent, cudaFree, cudaEventRecord, cudaGraphInstantiate, cudaGraphAddKernelNode, cudaFreeAsync
+cudaMemcpy, cudaDeviceGetAttribute, cudaDriverGetVersion, cudaGraphLaunch, cudaEventDestroy, cudaMallocAsync, cudaStreamEndCapture, cudaMallocManaged, cudaGraphCreate, cudaMemcpyAsync, cudaFreeAsync, cudaStreamCreateWithFlags, cudaGraphInstantiate, cudaStreamDestroy, cudaStreamBeginCapture, cudaStreamWaitEvent, cudaEventCreate, cudaGraphAddMemAllocNode, cudaFree, cudaGraphAddKernelNode, cudaGraphAddMemFreeNode, cudaGraphDestroy, cudaEventRecord, cudaStreamSynchronize, cudaMalloc, cudaGraphExecDestroy
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2017.vcxproj b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2017.vcxproj
index 54629b89..f025d778 100644
--- a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/graphMemoryNodes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2019.vcxproj b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2019.vcxproj
index ce007363..df298580 100644
--- a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/graphMemoryNodes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2022.vcxproj b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2022.vcxproj
index 70d59c50..5f123dc9 100644
--- a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/graphMemoryNodes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/Makefile b/Samples/3_CUDA_Features/immaTensorCoreGemm/Makefile
index 0018823d..840e9399 100644
--- a/Samples/3_CUDA_Features/immaTensorCoreGemm/Makefile
+++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/Makefile
@@ -291,9 +291,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 72 75 80 86 87
+SMS ?= 72 75 80 86 87 90
 else
-SMS ?= 75 80 86
+SMS ?= 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/immaTensorCoreGemm/NsightEclipse.xml
index ce92f2df..35a48fe6 100644
--- a/Samples/3_CUDA_Features/immaTensorCoreGemm/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/NsightEclipse.xml
@@ -6,18 +6,18 @@
     <flag>-maxrregcount=255</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaFuncSetAttribute</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaFuncSetAttribute</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[CUDA sample demonstrating a integer GEMM computation using the Warp Matrix Multiply and Accumulate (WMMA) API for integer introduced in CUDA 10. This sample demonstrates the use of the CUDA WMMA API employing the Tensor Cores introduced in the Volta chip family for faster matrix operations. In addition to that, it demonstrates the use of the new CUDA function attribute cudaFuncAttributeMaxDynamicSharedMemorySize that allows the application to reserve an extended amount of shared memory than it is available by default.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -47,6 +47,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/README.md b/Samples/3_CUDA_Features/immaTensorCoreGemm/README.md
index 61b40710..db9d4802 100644
--- a/Samples/3_CUDA_Features/immaTensorCoreGemm/README.md
+++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/README.md
@@ -10,7 +10,7 @@ Matrix Multiply, WMMA, Tensor Cores
 
 ## Supported SM Architectures
 
-[SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj
index 79b1a319..b48ad38f 100644
--- a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/immaTensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj
index 9351844d..b5931f57 100644
--- a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/immaTensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2022.vcxproj
index 17495837..90ae3390 100644
--- a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/immaTensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/Makefile b/Samples/3_CUDA_Features/jacobiCudaGraphs/Makefile
index 35fadaa5..fa42ed87 100644
--- a/Samples/3_CUDA_Features/jacobiCudaGraphs/Makefile
+++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/NsightEclipse.xml b/Samples/3_CUDA_Features/jacobiCudaGraphs/NsightEclipse.xml
index a752218c..e1572b43 100644
--- a/Samples/3_CUDA_Features/jacobiCudaGraphs/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/NsightEclipse.xml
@@ -3,28 +3,28 @@
 <entry>
   <name>jacobiCudaGraphs</name>
   <cuda_api_list>
-    <toolkit>cudaGraphAddMemsetNode</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaExtent</toolkit>
+    <toolkit>cudaGraphLaunch</toolkit>
+    <toolkit>cudaGraphAddMemcpyNode</toolkit>
     <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaPitchedPtr</toolkit>
-    <toolkit>cudaGraphCreate</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaPos</toolkit>
-    <toolkit>cudaGraphAddMemcpyNode</toolkit>
     <toolkit>cudaStreamEndCapture</toolkit>
-    <toolkit>cudaGraphExecDestroy</toolkit>
-    <toolkit>cudaStreamBeginCapture</toolkit>
-    <toolkit>cudaGraphExecKernelNodeSetParams</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaGraphLaunch</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphInstantiate</toolkit>
-    <toolkit>cudaExtent</toolkit>
-    <toolkit>cudaMemsetAsync</toolkit>
+    <toolkit>cudaGraphCreate</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaGraphAddKernelNode</toolkit>
+    <toolkit>cudaMemsetAsync</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaGraphExecKernelNodeSetParams</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
+    <toolkit>cudaGraphInstantiate</toolkit>
+    <toolkit>cudaStreamBeginCapture</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaGraphExecUpdate</toolkit>
+    <toolkit>cudaGraphAddKernelNode</toolkit>
+    <toolkit>cudaPos</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaGraphAddMemsetNode</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphExecDestroy</toolkit>
   </cuda_api_list>
   <description><![CDATA[Demonstrates Instantiated CUDA Graph Update with Jacobi Iterative Method using cudaGraphExecKernelNodeSetParams() and cudaGraphExecUpdate() approach.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -66,6 +66,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/README.md b/Samples/3_CUDA_Features/jacobiCudaGraphs/README.md
index b9d76e69..68722187 100644
--- a/Samples/3_CUDA_Features/jacobiCudaGraphs/README.md
+++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/README.md
@@ -10,7 +10,7 @@ CUDA Graphs, Stream Capture, Instantiated CUDA Graph Update, Cooperative Groups
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGraphAddMemsetNode, cudaStreamCreateWithFlags, cudaMemcpyAsync, cudaMallocHost, cudaPitchedPtr, cudaGraphCreate, cudaMalloc, cudaPos, cudaGraphAddMemcpyNode, cudaStreamEndCapture, cudaGraphExecDestroy, cudaStreamBeginCapture, cudaGraphExecKernelNodeSetParams, cudaStreamSynchronize, cudaGraphLaunch, cudaFree, cudaGraphInstantiate, cudaExtent, cudaMemsetAsync, cudaFreeHost, cudaGraphAddKernelNode, cudaGraphExecUpdate
+cudaExtent, cudaGraphLaunch, cudaGraphAddMemcpyNode, cudaMallocHost, cudaPitchedPtr, cudaStreamEndCapture, cudaGraphCreate, cudaFreeHost, cudaMemsetAsync, cudaMemcpyAsync, cudaGraphExecKernelNodeSetParams, cudaStreamCreateWithFlags, cudaGraphInstantiate, cudaStreamBeginCapture, cudaFree, cudaGraphExecUpdate, cudaGraphAddKernelNode, cudaPos, cudaStreamSynchronize, cudaGraphAddMemsetNode, cudaMalloc, cudaGraphExecDestroy
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj
index e8a5153b..489735bc 100644
--- a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/jacobiCudaGraphs.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj
index be846454..b6440eb1 100644
--- a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/jacobiCudaGraphs.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2022.vcxproj b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2022.vcxproj
index e7b1ea43..2d37b087 100644
--- a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobiCudaGraphs_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/jacobiCudaGraphs.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/Makefile b/Samples/3_CUDA_Features/memMapIPCDrv/Makefile
index fd7d6ba7..ae7b17d1 100644
--- a/Samples/3_CUDA_Features/memMapIPCDrv/Makefile
+++ b/Samples/3_CUDA_Features/memMapIPCDrv/Makefile
@@ -274,24 +274,6 @@ ifeq ($(TARGET_OS),darwin)
   SAMPLE_ENABLED := 0
 endif
 
-# This sample is not supported on ARMv7
-ifeq ($(TARGET_ARCH),armv7l)
-  $(info >>> WARNING - memMapIPCDrv is not supported on ARMv7 - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
-# This sample is not supported on aarch64
-ifeq ($(TARGET_ARCH),aarch64)
-  $(info >>> WARNING - memMapIPCDrv is not supported on aarch64 - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
-# This sample is not supported on sbsa
-ifeq ($(TARGET_ARCH),sbsa)
-  $(info >>> WARNING - memMapIPCDrv is not supported on sbsa - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
 ALL_LDFLAGS :=
 ALL_LDFLAGS += $(ALL_CCFLAGS)
 ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/README.md b/Samples/3_CUDA_Features/memMapIPCDrv/README.md
index 435af2ed..bace5c4f 100644
--- a/Samples/3_CUDA_Features/memMapIPCDrv/README.md
+++ b/Samples/3_CUDA_Features/memMapIPCDrv/README.md
@@ -10,27 +10,27 @@ CUDA Driver API, cuMemMap IPC, MMAP
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
-Linux, Windows
+Linux, Windows, QNX
 
 ## Supported CPU Architecture
 
-x86_64, ppc64le
+x86_64, ppc64le, armv7l, aarch64
 
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuCtxSetCurrent, cuMemSetAccess, cuMemcpyDtoHAsync, cuStreamDestroy, cuInit, cuMemAddressReserve, cuCtxDestroy, cuModuleGetFunction, cuModuleLoad, cuStreamCreate, cuCtxCreate, cuMemExportToShareableHandle, cuMemAddressFree, cuMemGetAllocationGranularity, cuModuleLoadDataEx, cuDeviceGet, cuMemUnmap, cuDeviceGetAttribute, cuMemRelease, cuCtxEnablePeerAccess, cuMemMap, cuMemImportFromShareableHandle, cuMemCreate, cuStreamSynchronize, cuDeviceCanAccessPeer, cuDeviceGetCount, cuLaunchKernel, cuOccupancyMaxActiveBlocksPerMultiprocessor
+cuDeviceCanAccessPeer, cuMemImportFromShareableHandle, cuModuleLoadDataEx, cuModuleGetFunction, cuMemSetAccess, cuModuleLoad, cuStreamCreate, cuMemRelease, cuInit, cuLaunchKernel, cuMemcpyDtoHAsync, cuMemCreate, cuDeviceGet, cuCtxDestroy, cuDeviceGetCount, cuMemMap, cuMemExportToShareableHandle, cuStreamSynchronize, cuCtxEnablePeerAccess, cuDeviceGetAttribute, cuOccupancyMaxActiveBlocksPerMultiprocessor, cuCtxSetCurrent, cuMemGetAllocationGranularity, cuMemAddressFree, cuMemUnmap, cuCtxCreate, cuStreamDestroy, cuMemAddressReserve
 
 ## Dependencies needed to build/run
 [IPC](../../../README.md#ipc)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
@@ -52,9 +52,9 @@ $ cd <sample_dir>
 $ make
 ```
 The samples makefiles can take advantage of certain options:
-*  **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le.
+*  **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l, aarch64.
     By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
-`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/>
+`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/> `$ make TARGET_ARCH=aarch64` <br/>
     See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
 *   **dbg=1** - build with debug symbols
     ```
diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj
index 02b0d7ea..4d7d058a 100644
--- a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj
index 3abc66c6..287fbc92 100644
--- a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2022.vcxproj b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2022.vcxproj
index 8da6a6cd..d6bc39c6 100644
--- a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIPCDrv_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp
index ba275b64..19d6aa60 100644
--- a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp
+++ b/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp
@@ -595,10 +595,6 @@ static void parentProcess(char *app) {
 
 // Host code
 int main(int argc, char **argv) {
-#if defined(__arm__) || defined(__aarch64__)
-  printf("Not supported on ARM\n");
-  return EXIT_WAIVED;
-#else
   // Initialize
   checkCudaErrors(cuInit(0));
 
@@ -608,7 +604,6 @@ int main(int argc, char **argv) {
     childProcess(atoi(argv[1]), atoi(argv[2]), argv);
   }
   return EXIT_SUCCESS;
-#endif
 }
 
 bool inline findModulePath(const char *module_file, string &module_path,
@@ -643,4 +638,4 @@ bool inline findModulePath(const char *module_file, string &module_path,
 
     return true;
   }
-}
\ No newline at end of file
+}
diff --git a/Samples/3_CUDA_Features/newdelete/Makefile b/Samples/3_CUDA_Features/newdelete/Makefile
index 9e4ba3b6..48c352f3 100644
--- a/Samples/3_CUDA_Features/newdelete/Makefile
+++ b/Samples/3_CUDA_Features/newdelete/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/newdelete/NsightEclipse.xml b/Samples/3_CUDA_Features/newdelete/NsightEclipse.xml
index edcd5270..ae7639e4 100644
--- a/Samples/3_CUDA_Features/newdelete/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/newdelete/NsightEclipse.xml
@@ -3,11 +3,11 @@
 <entry>
   <name>newdelete</name>
   <cuda_api_list>
-    <toolkit>cudaDeviceSetLimit</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaDeviceSetLimit</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates dynamic global memory allocation through device C++ new and delete operators and virtual function declarations available with CUDA 4.0.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/newdelete/README.md b/Samples/3_CUDA_Features/newdelete/README.md
index a976004d..1df54ae2 100644
--- a/Samples/3_CUDA_Features/newdelete/README.md
+++ b/Samples/3_CUDA_Features/newdelete/README.md
@@ -10,7 +10,7 @@ Device Memory Allocation, C++ Templates
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceSetLimit, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaDeviceSetLimit, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/newdelete/newdelete_vs2017.vcxproj b/Samples/3_CUDA_Features/newdelete/newdelete_vs2017.vcxproj
index 04f1a3b2..f5546f27 100644
--- a/Samples/3_CUDA_Features/newdelete/newdelete_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/newdelete/newdelete_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/newdelete.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/newdelete/newdelete_vs2019.vcxproj b/Samples/3_CUDA_Features/newdelete/newdelete_vs2019.vcxproj
index 75e28c88..4f6a09f4 100644
--- a/Samples/3_CUDA_Features/newdelete/newdelete_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/newdelete/newdelete_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/newdelete.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/newdelete/newdelete_vs2022.vcxproj b/Samples/3_CUDA_Features/newdelete/newdelete_vs2022.vcxproj
index 20c428e5..cec331d8 100644
--- a/Samples/3_CUDA_Features/newdelete/newdelete_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/newdelete/newdelete_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/newdelete.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/ptxjit/NsightEclipse.xml b/Samples/3_CUDA_Features/ptxjit/NsightEclipse.xml
index a5dc2e54..2beac119 100644
--- a/Samples/3_CUDA_Features/ptxjit/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/ptxjit/NsightEclipse.xml
@@ -3,18 +3,18 @@
 <entry>
   <name>ptxjit</name>
   <cuda_api_list>
+    <driver>cuLaunchKernel</driver>
+    <driver>cuModuleLoadData</driver>
+    <driver>cuLinkCreate</driver>
     <driver>cuModuleGetFunction</driver>
     <driver>cuLinkAddData</driver>
-    <driver>cuModuleLoadData</driver>
-    <driver>cuLaunchKernel</driver>
     <driver>cuModuleUnload</driver>
-    <driver>cuLinkComplete</driver>
-    <driver>cuLinkCreate</driver>
     <driver>cuLinkDestroy</driver>
-    <toolkit>cudaDriverGetVersion</toolkit>
-    <toolkit>cudaFree</toolkit>
+    <driver>cuLinkComplete</driver>
     <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaDriverGetVersion</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample uses the Driver API to just-in-time compile (JIT) a Kernel from PTX code. Additionally, this sample demonstrates the seamless interoperability capability of the CUDA Runtime and CUDA Driver API calls.  For CUDA 5.5, this sample shows how to use cuLink* functions to link PTX assembly using the CUDA driver at runtime.]]></description>
   <devicecompilation>separate</devicecompilation>
@@ -57,6 +57,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/ptxjit/README.md b/Samples/3_CUDA_Features/ptxjit/README.md
index 021b4b40..314c6284 100644
--- a/Samples/3_CUDA_Features/ptxjit/README.md
+++ b/Samples/3_CUDA_Features/ptxjit/README.md
@@ -10,7 +10,7 @@ CUDA Driver API
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuLinkAddData, cuModuleLoadData, cuLaunchKernel, cuModuleUnload, cuLinkComplete, cuLinkCreate, cuLinkDestroy
+cuLaunchKernel, cuModuleLoadData, cuLinkCreate, cuModuleGetFunction, cuLinkAddData, cuModuleUnload, cuLinkDestroy, cuLinkComplete
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDriverGetVersion, cudaFree, cudaMalloc, cudaMemcpy
+cudaMalloc, cudaDriverGetVersion, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2017.vcxproj b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2017.vcxproj
index 6c7fa953..8544a38c 100644
--- a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2019.vcxproj b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2019.vcxproj
index 9d3b8c11..d0c152c8 100644
--- a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2022.vcxproj b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2022.vcxproj
index f8eecb9f..c4dbf912 100644
--- a/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/ptxjit/ptxjit_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/Makefile b/Samples/3_CUDA_Features/simpleCudaGraphs/Makefile
index a64fbbf6..d956e9b4 100644
--- a/Samples/3_CUDA_Features/simpleCudaGraphs/Makefile
+++ b/Samples/3_CUDA_Features/simpleCudaGraphs/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/NsightEclipse.xml b/Samples/3_CUDA_Features/simpleCudaGraphs/NsightEclipse.xml
index 0b60949b..56db08fb 100644
--- a/Samples/3_CUDA_Features/simpleCudaGraphs/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/simpleCudaGraphs/NsightEclipse.xml
@@ -3,37 +3,37 @@
 <entry>
   <name>simpleCudaGraphs</name>
   <cuda_api_list>
-    <toolkit>cudaGraphAddMemsetNode</toolkit>
-    <toolkit>cudaGraphsUsingStreamCapture</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaGraphGetNodes</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaGraphClone</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaPitchedPtr</toolkit>
-    <toolkit>cudaGraphCreate</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaPos</toolkit>
+    <toolkit>cudaExtent</toolkit>
+    <toolkit>cudaGraphLaunch</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaLaunchHostFunc</toolkit>
     <toolkit>cudaGraphAddMemcpyNode</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaPitchedPtr</toolkit>
     <toolkit>cudaStreamEndCapture</toolkit>
-    <toolkit>cudaGraphExecDestroy</toolkit>
-    <toolkit>cudaStreamBeginCapture</toolkit>
+    <toolkit>cudaGraphCreate</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaGraphGetNodes</toolkit>
+    <toolkit>cudaMemsetAsync</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaGraphAddHostNode</toolkit>
+    <toolkit>cudaGraphInstantiate</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaStreamBeginCapture</toolkit>
+    <toolkit>cudaStreamWaitEvent</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaPos</toolkit>
+    <toolkit>cudaGraphAddKernelNode</toolkit>
+    <toolkit>cudaGraphDestroy</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaGraphsManual</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaGraphDestroy</toolkit>
-    <toolkit>cudaGraphLaunch</toolkit>
-    <toolkit>cudaStreamWaitEvent</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaGraphInstantiate</toolkit>
-    <toolkit>cudaLaunchHostFunc</toolkit>
-    <toolkit>cudaExtent</toolkit>
-    <toolkit>cudaMemsetAsync</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaGraphAddKernelNode</toolkit>
+    <toolkit>cudaGraphAddMemsetNode</toolkit>
+    <toolkit>cudaGraphsUsingStreamCapture</toolkit>
+    <toolkit>cudaGraphExecDestroy</toolkit>
   </cuda_api_list>
   <description><![CDATA[A demonstration of CUDA Graphs creation, instantiation and launch using Graphs APIs and Stream Capture APIs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -73,6 +73,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/README.md b/Samples/3_CUDA_Features/simpleCudaGraphs/README.md
index 9a2c9249..b421b2fb 100644
--- a/Samples/3_CUDA_Features/simpleCudaGraphs/README.md
+++ b/Samples/3_CUDA_Features/simpleCudaGraphs/README.md
@@ -10,7 +10,7 @@ CUDA Graphs, Stream Capture
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGraphAddMemsetNode, cudaGraphsUsingStreamCapture, cudaMemcpyAsync, cudaGraphGetNodes, cudaStreamDestroy, cudaMallocHost, cudaGraphClone, cudaEventCreate, cudaPitchedPtr, cudaGraphCreate, cudaMalloc, cudaPos, cudaGraphAddMemcpyNode, cudaStreamEndCapture, cudaGraphExecDestroy, cudaStreamBeginCapture, cudaGraphAddHostNode, cudaGraphsManual, cudaStreamSynchronize, cudaGraphDestroy, cudaGraphLaunch, cudaStreamWaitEvent, cudaFree, cudaEventRecord, cudaStreamCreate, cudaGraphInstantiate, cudaLaunchHostFunc, cudaExtent, cudaMemsetAsync, cudaFreeHost, cudaGraphAddKernelNode
+cudaGraphClone, cudaExtent, cudaGraphLaunch, cudaStreamCreate, cudaLaunchHostFunc, cudaGraphAddMemcpyNode, cudaMallocHost, cudaPitchedPtr, cudaStreamEndCapture, cudaGraphCreate, cudaFreeHost, cudaGraphGetNodes, cudaMemsetAsync, cudaMemcpyAsync, cudaGraphAddHostNode, cudaGraphInstantiate, cudaStreamDestroy, cudaStreamBeginCapture, cudaStreamWaitEvent, cudaEventCreate, cudaMalloc, cudaFree, cudaPos, cudaGraphAddKernelNode, cudaGraphDestroy, cudaEventRecord, cudaGraphsManual, cudaStreamSynchronize, cudaGraphAddMemsetNode, cudaGraphsUsingStreamCapture, cudaGraphExecDestroy
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj
index d9e1f37e..a9525b01 100644
--- a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleCudaGraphs.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj
index 7d3dfb84..168b88ab 100644
--- a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCudaGraphs.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2022.vcxproj b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2022.vcxproj
index b41246b0..58840c80 100644
--- a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCudaGraphs.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/Makefile b/Samples/3_CUDA_Features/tf32TensorCoreGemm/Makefile
index eee8e843..a5c51424 100644
--- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/Makefile
+++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/Makefile
@@ -322,9 +322,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 80 86 87
+SMS ?= 80 86 87 90
 else
-SMS ?= 80 86
+SMS ?= 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/NsightEclipse.xml b/Samples/3_CUDA_Features/tf32TensorCoreGemm/NsightEclipse.xml
index 8213c761..f21f3895 100644
--- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/NsightEclipse.xml
@@ -7,18 +7,18 @@
     <flag>--maxrregcount=128</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaFuncSetAttribute</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaFuncSetAttribute</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA sample demonstrating tf32 (e8m10) GEMM computation using the Warp Matrix Multiply and Accumulate (WMMA) API introduced with CUDA 11 in Ampere chip family tensor cores for faster matrix operations. This sample also uses async copy provided by cuda pipeline interface for gmem to shmem async loads which improves kernel performance and reduces register presssure.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -53,6 +53,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/README.md b/Samples/3_CUDA_Features/tf32TensorCoreGemm/README.md
index 343eb6de..d7f41f68 100644
--- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/README.md
+++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/README.md
@@ -10,7 +10,7 @@ Matrix Multiply, WMMA, Tensor Cores
 
 ## Supported SM Architectures
 
-[SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncSetAttribute, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaGetErrorString, cudaGetLastError, cudaEventSynchronize, cudaFuncSetAttribute, cudaEventRecord, cudaMemset, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [CPP11](../../../README.md#cpp11)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj
index 6b002d88..2948b39d 100644
--- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/tf32TensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj
index 4166d39e..7a06218b 100644
--- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/tf32TensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2022.vcxproj b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2022.vcxproj
index c4534b65..f7b1d1a0 100644
--- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/tf32TensorCoreGemm.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile
index e743176e..aa25f151 100644
--- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile
+++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile
@@ -304,9 +304,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/NsightEclipse.xml b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/NsightEclipse.xml
index 7f5952b9..f0457b12 100644
--- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/NsightEclipse.xml
+++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/NsightEclipse.xml
@@ -6,11 +6,11 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceGetAttribute</toolkit>
+    <toolkit>cudaMemset</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates how using Cooperative Groups (CG) to perform warp aggregated atomics to single and multiple counters, a useful technique to improve performance when many threads atomically add to a single or multiple counters.]]></description>
   <includepaths>
@@ -50,6 +50,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/README.md b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/README.md
index 69c27bdd..f40b05ef 100644
--- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/README.md
+++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/README.md
@@ -10,7 +10,7 @@ Cooperative Groups, Atomic Intrinsics
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaDeviceGetAttribute, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaFree, cudaDeviceGetAttribute, cudaMemset, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj
index d7339145..0e2d5973 100644
--- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj
+++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/warpAggregatedAtomicsCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj
index 36bd9bca..a15057bb 100644
--- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj
+++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/warpAggregatedAtomicsCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2022.vcxproj b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2022.vcxproj
index ff0902b4..8567bb36 100644
--- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2022.vcxproj
+++ b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/warpAggregatedAtomicsCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2017.vcxproj
index 695d8b1a..61a190ad 100644
--- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2019.vcxproj
index 702ecd0a..438dd31e 100644
--- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2022.vcxproj
index 0e5f2929..86780849 100644
--- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/NsightEclipse.xml
index cc161f14..60bd7e9d 100644
--- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/NsightEclipse.xml
@@ -3,12 +3,12 @@
 <entry>
   <name>FilterBorderControlNPP</name>
   <cuda_api_list>
-    <toolkit>cudaDeviceReset</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaDriverGetVersion</toolkit>
-    <toolkit>cudaDeviceInit</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaRuntimeGetVersion</toolkit>
+    <toolkit>cudaDeviceReset</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaDeviceInit</toolkit>
+    <toolkit>cudaDriverGetVersion</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates how any border version of an NPP filtering function can be used in the most common mode, with border control enabled. Mentioned functions can be used to duplicate the results of the equivalent non-border version of the NPP functions. They can be also used for enabling and disabling border control on various source image edges depending on what portion of the source image is being used as input.]]></description>
@@ -68,6 +68,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/README.md b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/README.md
index 23f2bfb8..54a652ad 100644
--- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/README.md
+++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceReset, cudaGetDeviceCount, cudaDriverGetVersion, cudaDeviceInit, cudaSetDevice, cudaRuntimeGetVersion, cudaGetDeviceProperties
+cudaRuntimeGetVersion, cudaDeviceReset, cudaSetDevice, cudaGetDeviceCount, cudaDeviceInit, cudaDriverGetVersion, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj
index dd819226..17822bcb 100644
--- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj
index d2ac6df2..75322608 100644
--- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2022.vcxproj b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2022.vcxproj
index 1de5e019..a40090f1 100644
--- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/NsightEclipse.xml b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/NsightEclipse.xml
index 40c95ec0..34ed799b 100644
--- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>MersenneTwisterGP11213</name>
   <cuda_api_list>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates the Mersenne Twister random number generator GP11213 in cuRAND.]]></description>
@@ -58,6 +58,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/README.md b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/README.md
index 9af50ae2..6244164c 100644
--- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/README.md
+++ b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/README.md
@@ -10,7 +10,7 @@ CURAND Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaStreamDestroy, cudaFree, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync
 
 ## Dependencies needed to build/run
 [CURAND](../../../README.md#curand)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml b/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml
index e166cad3..44f306e2 100644
--- a/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml
@@ -5,15 +5,15 @@
   <cuda_api_list>
     <driver>cuRand</driver>
     <driver>cuEqual</driver>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA Sample that demonstrates how using batched CUBLAS API calls to improve overall performance.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -58,6 +58,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/README.md b/Samples/4_CUDA_Libraries/batchCUBLAS/README.md
index ade22850..63575022 100644
--- a/Samples/4_CUDA_Libraries/batchCUBLAS/README.md
+++ b/Samples/4_CUDA_Libraries/batchCUBLAS/README.md
@@ -10,7 +10,7 @@ Linear Algebra, CUBLAS Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -26,14 +26,14 @@ x86_64, ppc64le, armv7l
 cuRand, cuEqual
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaStreamCreate, cudaDeviceSynchronize, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaGetLastError, cudaDeviceSynchronize, cudaGetDevice, cudaMalloc, cudaStreamCreate, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CUBLAS](../../../README.md#cublas)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2017.vcxproj b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2017.vcxproj
index 8df8d495..2c41bea3 100644
--- a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2019.vcxproj b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2019.vcxproj
index 3f42f964..1bd17bac 100644
--- a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2022.vcxproj b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2022.vcxproj
index 1823a30b..dba07dfd 100644
--- a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml
index 821f1eea..9cd72dd8 100644
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>batchedLabelMarkersAndLabelCompressionNPP</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaMallocPitch</toolkit>
-    <toolkit>cudaDeviceGetAttribute</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaDriverGetVersion</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamGetFlags</toolkit>
     <toolkit>cudaRuntimeGetVersion</toolkit>
+    <toolkit>cudaMallocPitch</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaDeviceGetAttribute</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaDriverGetVersion</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaStreamGetFlags</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDevice</toolkit>
   </cuda_api_list>
   <description><![CDATA[An NPP CUDA Sample that demonstrates how to use the NPP label markers generation and label compression functions based on a Union Find (UF) algorithm including both single image and batched image versions.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -68,6 +68,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md
index 97dbcd1d..ddc106f3 100644
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md
+++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library, Using NPP Batch Functions
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMallocHost, cudaMallocPitch, cudaDeviceGetAttribute, cudaFreeHost, cudaDriverGetVersion, cudaMalloc, cudaStreamGetFlags, cudaRuntimeGetVersion, cudaStreamSynchronize, cudaMemcpyAsync, cudaGetDeviceProperties, cudaGetDevice
+cudaRuntimeGetVersion, cudaMallocPitch, cudaFree, cudaDeviceGetAttribute, cudaMallocHost, cudaDriverGetVersion, cudaFreeHost, cudaGetDevice, cudaStreamGetFlags, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [NPP](../../../README.md#npp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj
index 59e207a8..da25b507 100644
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj
index 148be7cb..928dc419 100644
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2022.vcxproj
index 1f5a0ff4..5b9408aa 100644
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml
index ae3b9b46..da9c0d55 100644
--- a/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml
@@ -3,8 +3,8 @@
 <entry>
   <name>boxFilterNPP</name>
   <cuda_api_list>
-    <toolkit>cudaDriverGetVersion</toolkit>
     <toolkit>cudaRuntimeGetVersion</toolkit>
+    <toolkit>cudaDriverGetVersion</toolkit>
   </cuda_api_list>
   <description><![CDATA[A NPP CUDA Sample that demonstrates how to use NPP FilterBox function to perform a Box Filter.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -64,6 +64,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/README.md b/Samples/4_CUDA_Libraries/boxFilterNPP/README.md
index 3de69529..f647397c 100644
--- a/Samples/4_CUDA_Libraries/boxFilterNPP/README.md
+++ b/Samples/4_CUDA_Libraries/boxFilterNPP/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDriverGetVersion, cudaRuntimeGetVersion
+cudaRuntimeGetVersion, cudaDriverGetVersion
 
 ## Dependencies needed to build/run
 [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2017.vcxproj
index 693cda83..bf2a30ab 100644
--- a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2019.vcxproj
index 0f7a3fa0..a7ab43bb 100644
--- a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2022.vcxproj
index 1ef674c0..5b3e3fb3 100644
--- a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/NsightEclipse.xml
index d4dca5ba..26b3f456 100644
--- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>cannyEdgeDetectorNPP</name>
   <cuda_api_list>
+    <toolkit>cudaRuntimeGetVersion</toolkit>
     <toolkit>cudaFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaDeviceInit</toolkit>
     <toolkit>cudaDriverGetVersion</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaDeviceInit</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaRuntimeGetVersion</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[An NPP CUDA Sample that demonstrates the recommended parameters to use with the nppiFilterCannyBorder_8u_C1R Canny Edge Detection image filter function. This function expects a single channel 8-bit grayscale input image. You can generate a grayscale image from a color image by first calling nppiColorToGray() or nppiRGBToGray(). The Canny Edge Detection function combines and improves on the techniques required to produce an edge detection image using multiple steps.]]></description>
@@ -66,6 +66,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md
index 41630e5d..01493d71 100644
--- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md
+++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGetDeviceCount, cudaDriverGetVersion, cudaMalloc, cudaDeviceInit, cudaSetDevice, cudaRuntimeGetVersion, cudaGetDeviceProperties
+cudaRuntimeGetVersion, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaDeviceInit, cudaDriverGetVersion, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj
index a2fe1e4c..338a498d 100644
--- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj
index 0510328c..91164159 100644
--- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2022.vcxproj
index 29e20532..31270caa 100644
--- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradient/NsightEclipse.xml
index 7fb14579..99a85b35 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradient/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/conjugateGradient/NsightEclipse.xml
@@ -3,10 +3,10 @@
 <entry>
   <name>conjugateGradient</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements a conjugate gradient solver on GPU using CUBLAS and CUSPARSE library.]]></description>
@@ -59,6 +59,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/README.md b/Samples/4_CUDA_Libraries/conjugateGradient/README.md
index c8521987..9f664782 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradient/README.md
+++ b/Samples/4_CUDA_Libraries/conjugateGradient/README.md
@@ -10,7 +10,7 @@ Linear Algebra, CUBLAS Library, CUSPARSE Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2017.vcxproj
index 3f9f1fc8..42096e5a 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2019.vcxproj
index 9755b7c0..44cdb92c 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2022.vcxproj
index 2d9bb918..afb56e1d 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradient/conjugateGradient_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/Makefile b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/Makefile
index af3ffc45..4c4c95d5 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/Makefile
+++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/NsightEclipse.xml
index 3a4d5e98..0a766802 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/NsightEclipse.xml
@@ -3,23 +3,23 @@
 <entry>
   <name>conjugateGradientCudaGraphs</name>
   <cuda_api_list>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphExecDestroy</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaGraphInstantiate</toolkit>
-    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaStreamBeginCapture</toolkit>
-    <toolkit>cudaMemsetAsync</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaStreamEndCapture</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaGraphDestroy</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaGraphLaunch</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaMemsetAsync</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaGraphExecDestroy</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements a conjugate gradient solver on GPU using CUBLAS and CUSPARSE library calls captured and called using CUDA Graph APIs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -71,6 +71,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md
index 8d634cde..787c89a4 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md
+++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md
@@ -10,7 +10,7 @@ Linear Algebra, CUBLAS Library, CUSPARSE Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaGraphExecDestroy, cudaMallocHost, cudaStreamCreate, cudaGraphInstantiate, cudaOccupancyMaxPotentialBlockSize, cudaStreamBeginCapture, cudaMemsetAsync, cudaStreamEndCapture, cudaFreeHost, cudaMalloc, cudaStreamSynchronize, cudaMemcpyAsync, cudaGraphDestroy, cudaGetDeviceProperties, cudaGraphLaunch
+cudaGraphInstantiate, cudaStreamDestroy, cudaStreamBeginCapture, cudaFree, cudaMallocHost, cudaStreamEndCapture, cudaGraphDestroy, cudaFreeHost, cudaGraphLaunch, cudaStreamCreate, cudaStreamSynchronize, cudaOccupancyMaxPotentialBlockSize, cudaMalloc, cudaMemcpyAsync, cudaMemsetAsync, cudaGetDeviceProperties, cudaGraphExecDestroy
 
 ## Dependencies needed to build/run
 [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj
index 8bd52098..2a1e00ec 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/conjugateGradientCudaGraphs.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj
index 1838fc79..c1e36760 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/conjugateGradientCudaGraphs.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2022.vcxproj
index b07dfc7b..f641b210 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/conjugateGradientCudaGraphs.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/Makefile b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/Makefile
index 2cd57b05..6e5d3435 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/Makefile
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/Makefile
@@ -322,9 +322,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 61 70 72 75 80 86 87
+SMS ?= 61 70 72 75 80 86 87 90
 else
-SMS ?= 60 61 70 75 80 86
+SMS ?= 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/NsightEclipse.xml
index 55c72749..0deaa733 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/NsightEclipse.xml
@@ -7,15 +7,15 @@
   </cflags>
   <cuda_api_list>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaMallocManaged</toolkit>
-    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaLaunchCooperativeKernel</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements a conjugate gradient solver on GPU using Multi Block Cooperative Groups, also uses Unified Memory.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -60,6 +60,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md
index 804955b6..e8c0643c 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md
@@ -10,7 +10,7 @@ Unified Memory, Linear Algebra, Cooperative Groups, MultiBlock Cooperative Group
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaMallocManaged, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventDestroy, cudaLaunchCooperativeKernel, cudaGetDeviceProperties
+cudaFree, cudaMallocManaged, cudaDeviceSynchronize, cudaEventRecord, cudaLaunchCooperativeKernel, cudaEventDestroy, cudaEventElapsedTime, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [UVM](../../../README.md#uvm), [MBCG](../../../README.md#mbcg)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj
index 0fa1a17e..d3fc2fdf 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/conjugateGradientMultiBlockCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj
index 7301e032..6c064a95 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/conjugateGradientMultiBlockCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2022.vcxproj
index 57816f0b..0e4d81be 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/conjugateGradientMultiBlockCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/Makefile b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/Makefile
index 06f2703e..31713181 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/Makefile
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/Makefile
@@ -322,9 +322,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 61 70 72 75 80 86 87
+SMS ?= 61 70 72 75 80 86 87 90
 else
-SMS ?= 60 61 70 75 80 86
+SMS ?= 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/NsightEclipse.xml
index 7852487a..ef83507c 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/NsightEclipse.xml
@@ -8,22 +8,22 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMallocManaged</toolkit>
-    <toolkit>cudaMemPrefetchAsync</toolkit>
     <toolkit>cudaHostAlloc</toolkit>
-    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaDeviceCanAccessPeer</toolkit>
+    <toolkit>cudaMemPrefetchAsync</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaLaunchCooperativeKernel</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaMemAdvise</toolkit>
+    <toolkit>cudaMallocManaged</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
+    <toolkit>cudaMemAdvise</toolkit>
+    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
+    <toolkit>cudaDeviceCanAccessPeer</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements a conjugate gradient solver on multiple GPUs using Multi Device Cooperative Groups, also uses Unified Memory optimized using prefetching and usage hints.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -71,6 +71,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md
index e21a3507..9d77bf38 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md
@@ -10,7 +10,7 @@ Unified Memory, Linear Algebra, Cooperative Groups, MultiDevice Cooperative Grou
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceEnablePeerAccess, cudaMemset, cudaFree, cudaMallocManaged, cudaMemPrefetchAsync, cudaHostAlloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaStreamCreate, cudaGetDeviceCount, cudaFreeHost, cudaSetDevice, cudaDeviceCanAccessPeer, cudaLaunchCooperativeKernel, cudaStreamSynchronize, cudaMemAdvise, cudaGetDeviceProperties
+cudaHostAlloc, cudaMemPrefetchAsync, cudaFree, cudaLaunchCooperativeKernel, cudaMallocManaged, cudaSetDevice, cudaGetDeviceCount, cudaGetDeviceProperties, cudaFreeHost, cudaMemset, cudaStreamCreate, cudaStreamSynchronize, cudaDeviceEnablePeerAccess, cudaMemAdvise, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaDeviceCanAccessPeer
 
 ## Dependencies needed to build/run
 [UVM](../../../README.md#uvm), [MDCG](../../../README.md#mdcg), [CPP11](../../../README.md#cpp11)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj
index 85930d36..b58051c3 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/conjugateGradientMultiDeviceCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj
index 4a5df0b8..b422370d 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/conjugateGradientMultiDeviceCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2022.vcxproj
index 91ab39cc..41caff0d 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/conjugateGradientMultiDeviceCG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/NsightEclipse.xml
index 62d843c0..760d5de3 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/NsightEclipse.xml
@@ -3,10 +3,10 @@
 <entry>
   <name>conjugateGradientPrecond</name>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements a preconditioned conjugate gradient solver on GPU using CUBLAS and CUSPARSE library.]]></description>
@@ -57,6 +57,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/README.md b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/README.md
index 6e1116d5..bded9817 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/README.md
+++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/README.md
@@ -10,7 +10,7 @@ Linear Algebra, CUBLAS Library, CUSPARSE Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaMemset, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2017.vcxproj
index bd750e90..3af1df6d 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2019.vcxproj
index 593817cd..0721d9eb 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2022.vcxproj
index d368d236..e601f5fd 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/conjugateGradientPrecond_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/Makefile b/Samples/4_CUDA_Libraries/conjugateGradientUM/Makefile
index b16c5e68..8f72576d 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientUM/Makefile
+++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/NsightEclipse.xml b/Samples/4_CUDA_Libraries/conjugateGradientUM/NsightEclipse.xml
index 3e59ba5d..ca7258c9 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientUM/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/NsightEclipse.xml
@@ -59,6 +59,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/README.md b/Samples/4_CUDA_Libraries/conjugateGradientUM/README.md
index bb3253f1..ac9fd252 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientUM/README.md
+++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/README.md
@@ -10,7 +10,7 @@ Unified Memory, Linear Algebra, CUBLAS Library, CUSPARSE Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -30,7 +30,7 @@ cudaFree, cudaMallocManaged, cudaDeviceSynchronize, cudaMalloc, cudaGetDevicePro
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2017.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2017.vcxproj
index 62f9d13d..4a8f1f5f 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/conjugateGradientUM.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2019.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2019.vcxproj
index a6541e3d..44fd5a52 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/conjugateGradientUM.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2022.vcxproj b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2022.vcxproj
index 045228e5..05b5205c 100644
--- a/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/conjugateGradientUM/conjugateGradientUM_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/conjugateGradientUM.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/Makefile b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/Makefile
index 19b23678..9c8f53fe 100644
--- a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/Makefile
+++ b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/Makefile
@@ -330,9 +330,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/NsightEclipse.xml
index fdf187ae..0d4cf217 100644
--- a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/NsightEclipse.xml
@@ -6,14 +6,14 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaMemsetAsync</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetErrorName</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemsetAsync</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates how DLA errors can be detected via CUDA.]]></description>
@@ -51,6 +51,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>aarch64</arch>
diff --git a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/README.md b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/README.md
index e4b50ac9..e18dc7d8 100644
--- a/Samples/4_CUDA_Libraries/cuDLAErrorReporting/README.md
+++ b/Samples/4_CUDA_Libraries/cuDLAErrorReporting/README.md
@@ -10,7 +10,7 @@ cuDLA, Data Parallel Algorithms, Image Processing
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaStreamSynchronize, cudaMemsetAsync, cudaMalloc, cudaStreamCreateWithFlags, cudaSetDevice, cudaGetErrorName, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaStreamDestroy, cudaFree, cudaGetErrorName, cudaSetDevice, cudaStreamSynchronize, cudaMalloc, cudaMemsetAsync, cudaMemcpyAsync
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/4_CUDA_Libraries/cuDLAHybridMode/Makefile b/Samples/4_CUDA_Libraries/cuDLAHybridMode/Makefile
index 70ba53d6..5cf7413a 100644
--- a/Samples/4_CUDA_Libraries/cuDLAHybridMode/Makefile
+++ b/Samples/4_CUDA_Libraries/cuDLAHybridMode/Makefile
@@ -330,9 +330,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/cuDLAHybridMode/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuDLAHybridMode/NsightEclipse.xml
index 07ca19c5..d59cdec6 100644
--- a/Samples/4_CUDA_Libraries/cuDLAHybridMode/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cuDLAHybridMode/NsightEclipse.xml
@@ -6,14 +6,14 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaMemsetAsync</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetErrorName</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemsetAsync</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates cuDLA hybrid mode wherein DLA can be programmed using CUDA.]]></description>
@@ -51,6 +51,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>aarch64</arch>
diff --git a/Samples/4_CUDA_Libraries/cuDLAHybridMode/README.md b/Samples/4_CUDA_Libraries/cuDLAHybridMode/README.md
index 2f4559bb..12799c18 100644
--- a/Samples/4_CUDA_Libraries/cuDLAHybridMode/README.md
+++ b/Samples/4_CUDA_Libraries/cuDLAHybridMode/README.md
@@ -10,7 +10,7 @@ cuDLA, Data Parallel Algorithms, Image Processing
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaStreamSynchronize, cudaMemsetAsync, cudaMalloc, cudaStreamCreateWithFlags, cudaSetDevice, cudaGetErrorName, cudaMemcpyAsync
+cudaStreamCreateWithFlags, cudaStreamDestroy, cudaFree, cudaGetErrorName, cudaSetDevice, cudaStreamSynchronize, cudaMalloc, cudaMemsetAsync, cudaMemcpyAsync
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/Makefile b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/Makefile
index 4893fee9..76bfe834 100644
--- a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/Makefile
+++ b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/Makefile
@@ -333,9 +333,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/NsightEclipse.xml
index b9db9e0b..7f3ef809 100644
--- a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/NsightEclipse.xml
@@ -45,6 +45,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>aarch64</arch>
diff --git a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/README.md b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/README.md
index 6ca250ba..21cdfb8f 100644
--- a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/README.md
+++ b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/README.md
@@ -10,7 +10,7 @@ cuDLA, Data Parallel Algorithms, Image Processing
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -27,7 +27,7 @@ aarch64
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/findnvsci.mk b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/findnvsci.mk
index 1934e138..0d6d157c 100644
--- a/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/findnvsci.mk
+++ b/Samples/4_CUDA_Libraries/cuDLAStandaloneMode/findnvsci.mk
@@ -56,6 +56,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -101,6 +102,11 @@ ifeq ("$(TARGET_OS)","linux")
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
+    ifeq ("$(KYLIN)","0")
+      GLPATH    ?= /usr/lib64/nvidia
+      GLLINK    ?= -L/usr/lib64/nvidia
+      DFLT_PATH ?= /usr/lib64
+    endif
 
   NVSCIBUFLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscibuf.so    -print 2>/dev/null)
   NVSCISYNCLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscisync.so    -print 2>/dev/null)
diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/NsightEclipse.xml
index ba8ad201..9405b394 100644
--- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/NsightEclipse.xml
@@ -5,13 +5,13 @@
   <cuda_api_list>
     <driver>cuDoubleComplex</driver>
     <driver>cuComplex</driver>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA Sample that demonstrates cuSolverDN's LU, QR and Cholesky factorization.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -68,6 +68,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md
index 92210b8f..0b311943 100644
--- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md
+++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md
@@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -26,14 +26,14 @@ x86_64, ppc64le, aarch64
 cuDoubleComplex, cuComplex
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaStreamDestroy, cudaStreamCreate, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaStreamDestroy, cudaFree, cudaDeviceSynchronize, cudaMemset, cudaMalloc, cudaStreamCreate
 
 ## Dependencies needed to build/run
 [CUSOLVER](../../../README.md#cusolver), [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj
index 90d7b869..cd859b9b 100644
--- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj
index 2b63d7d1..6155be1a 100644
--- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2022.vcxproj
index 1f123af8..98448924 100644
--- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/Makefile b/Samples/4_CUDA_Libraries/cuSolverRf/Makefile
index 27824bc2..92fdc984 100644
--- a/Samples/4_CUDA_Libraries/cuSolverRf/Makefile
+++ b/Samples/4_CUDA_Libraries/cuSolverRf/Makefile
@@ -283,9 +283,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverRf/NsightEclipse.xml
index 2cf3f041..84c56c89 100644
--- a/Samples/4_CUDA_Libraries/cuSolverRf/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cuSolverRf/NsightEclipse.xml
@@ -3,15 +3,15 @@
 <entry>
   <name>cuSolverRf</name>
   <cuda_api_list>
+    <driver>cuGet</driver>
     <driver>cuDoubleComplex</driver>
     <driver>cuComplex</driver>
-    <driver>cuGet</driver>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA Sample that demonstrates cuSolver's refactorization library - CUSOLVERRF.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -67,6 +67,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/README.md b/Samples/4_CUDA_Libraries/cuSolverRf/README.md
index d0a99cf4..c268cf07 100644
--- a/Samples/4_CUDA_Libraries/cuSolverRf/README.md
+++ b/Samples/4_CUDA_Libraries/cuSolverRf/README.md
@@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,17 +23,17 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuDoubleComplex, cuComplex, cuGet
+cuGet, cuDoubleComplex, cuComplex
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaStreamDestroy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaStreamCreate
 
 ## Dependencies needed to build/run
 [CUSOLVER](../../../README.md#cusolver), [CUBLAS](../../../README.md#cublas), [CUSPARSE](../../../README.md#cusparse)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2017.vcxproj
index fc5a4d2f..bcc7a6a4 100644
--- a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cuSolverRf.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2019.vcxproj
index 02802989..16948fcd 100644
--- a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cuSolverRf.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2022.vcxproj
index 9cd7669f..6de3db9b 100644
--- a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cuSolverRf.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/Makefile b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/Makefile
index 59b0c3d0..04cdc6ea 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/Makefile
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/Makefile
@@ -283,9 +283,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/NsightEclipse.xml
index cc3a57ad..f6b92a54 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/NsightEclipse.xml
@@ -7,9 +7,9 @@
     <driver>cuComplex</driver>
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA Sample that demonstrates cuSolverSP's LU, QR and Cholesky factorization.]]></description>
@@ -68,6 +68,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md
index 2ac87f31..25ce2865 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md
@@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -26,14 +26,14 @@ x86_64, ppc64le, armv7l
 cuDoubleComplex, cuComplex
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaDeviceSynchronize, cudaMalloc, cudaMemcpyAsync
+cudaStreamDestroy, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaStreamCreate, cudaMemcpyAsync
 
 ## Dependencies needed to build/run
 [CUSOLVER](../../../README.md#cusolver), [CUSPARSE](../../../README.md#cusparse)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj
index 78512bbd..9978ec3e 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cuSolverSp_LinearSolver.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj
index fe619501..a7aef7dd 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cuSolverSp_LinearSolver.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2022.vcxproj
index 0d22534d..d6a6ef40 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cuSolverSp_LinearSolver.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/Makefile b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/Makefile
index e80902e4..649da6f8 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/Makefile
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/Makefile
@@ -283,9 +283,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/NsightEclipse.xml
index 32bfa4d3..65fa8556 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/NsightEclipse.xml
@@ -5,11 +5,11 @@
   <cuda_api_list>
     <driver>cuDoubleComplex</driver>
     <driver>cuComplex</driver>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA Sample that demonstrates Cholesky factorization using cuSolverSP's low level APIs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -59,6 +59,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md
index a992f9ef..3cf4112f 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md
@@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -26,14 +26,14 @@ x86_64, ppc64le, armv7l
 cuDoubleComplex, cuComplex
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaStreamDestroy, cudaFree, cudaMalloc, cudaStreamCreate
 
 ## Dependencies needed to build/run
 [CUSOLVER](../../../README.md#cusolver), [CUSPARSE](../../../README.md#cusparse)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2017.vcxproj
index d04553b8..d2c8031e 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cuSolverSp_LowlevelCholesky.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2019.vcxproj
index 3b686a78..2703da3a 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cuSolverSp_LowlevelCholesky.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2022.vcxproj
index 94e8d403..b85749bf 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cuSolverSp_LowlevelCholesky.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/Makefile b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/Makefile
index 5e3384bc..2e7d1c9f 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/Makefile
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/Makefile
@@ -283,9 +283,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/NsightEclipse.xml
index 3e3dc5c4..51bab24a 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/NsightEclipse.xml
@@ -3,14 +3,14 @@
 <entry>
   <name>cuSolverSp_LowlevelQR</name>
   <cuda_api_list>
+    <driver>cuGet</driver>
     <driver>cuDoubleComplex</driver>
     <driver>cuComplex</driver>
-    <driver>cuGet</driver>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA Sample that demonstrates QR factorization using cuSolverSP's low level APIs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -60,6 +60,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md
index eae84e4d..df5f2a84 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md
@@ -10,7 +10,7 @@ Linear Algebra, CUSOLVER Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,17 +23,17 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuDoubleComplex, cuComplex, cuGet
+cuGet, cuDoubleComplex, cuComplex
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaStreamCreate, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaStreamDestroy, cudaFree, cudaMalloc, cudaStreamCreate
 
 ## Dependencies needed to build/run
 [CUSOLVER](../../../README.md#cusolver), [CUSPARSE](../../../README.md#cusparse)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2017.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2017.vcxproj
index b62c633e..b25aa37d 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/cuSolverSp_LowlevelQR.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2019.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2019.vcxproj
index 5ab45d7d..af967c45 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cuSolverSp_LowlevelQR.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2022.vcxproj b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2022.vcxproj
index d8ee669f..bb98790f 100644
--- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/cuSolverSp_LowlevelQR.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/Makefile b/Samples/4_CUDA_Libraries/cudaNvSci/Makefile
index dd8e7bce..f6561b1b 100644
--- a/Samples/4_CUDA_Libraries/cudaNvSci/Makefile
+++ b/Samples/4_CUDA_Libraries/cudaNvSci/Makefile
@@ -327,9 +327,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cudaNvSci/NsightEclipse.xml
index a8659d17..0995196e 100644
--- a/Samples/4_CUDA_Libraries/cudaNvSci/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cudaNvSci/NsightEclipse.xml
@@ -7,39 +7,39 @@
   </cflags>
   <cuda_api_list>
     <driver>cuDeviceGetUuid</driver>
-    <toolkit>cudaGetMipmappedArrayLevel</toolkit>
-    <toolkit>cudaImportNvSciImage</toolkit>
-    <toolkit>cudaImportExternalSemaphore</toolkit>
-    <toolkit>cudaNvSciApp</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaExternalMemoryGetMappedMipmappedArray</toolkit>
-    <toolkit>cudaNvSciWait</toolkit>
-    <toolkit>cudaDestroyExternalMemory</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
-    <toolkit>cudaDeviceGetNvSciSyncAttributes</toolkit>
-    <toolkit>cudaFreeMipmappedArray</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaNvSci</toolkit>
-    <toolkit>cudaImportExternalMemory</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaImportNvSciRawBuf</toolkit>
-    <toolkit>cudaImportNvSciSemaphore</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaDeviceGetAttribute</toolkit>
-    <toolkit>cudaDestroyExternalSemaphore</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaNvSciSignal</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaDeviceId</toolkit>
     <toolkit>cudaExternalMemoryGetMappedBuffer</toolkit>
+    <toolkit>cudaImportExternalSemaphore</toolkit>
+    <toolkit>cudaDeviceGetAttribute</toolkit>
+    <toolkit>cudaNvSciSignal</toolkit>
+    <toolkit>cudaGetMipmappedArrayLevel</toolkit>
+    <toolkit>cudaImportNvSciRawBuf</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaImportNvSciImage</toolkit>
+    <toolkit>cudaNvSciApp</toolkit>
+    <toolkit>cudaDeviceId</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
     <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaNvSci</toolkit>
+    <toolkit>cudaNvSciWait</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
+    <toolkit>cudaExternalMemoryGetMappedMipmappedArray</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaDeviceGetNvSciSyncAttributes</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaDestroyExternalMemory</toolkit>
+    <toolkit>cudaImportExternalMemory</toolkit>
+    <toolkit>cudaDestroyExternalSemaphore</toolkit>
+    <toolkit>cudaFreeMipmappedArray</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
     <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaImportNvSciSemaphore</toolkit>
   </cuda_api_list>
-  <description><![CDATA[This sample demonstrates CUDA-NvSciBuf/NvSciSync Interop. Two CPU threads import the NvSciBuf and NvSciSync into CUDA to perform two image processing algorithms on a ppm image - image rotation in 1st thread &amp;amp;amp;amp;amp;amp;amp;amp;amp;amp; rgba to grayscale conversion of rotated image in 2nd thread. Currently only supported on Ubuntu 18.04]]></description>
+  <description><![CDATA[This sample demonstrates CUDA-NvSciBuf/NvSciSync Interop. Two CPU threads import the NvSciBuf and NvSciSync into CUDA to perform two image processing algorithms on a ppm image - image rotation in 1st thread &amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp; rgba to grayscale conversion of rotated image in 2nd thread. Currently only supported on Ubuntu 18.04]]></description>
   <devicecompilation>whole</devicecompilation>
   <includepaths>
     <path>./</path>
@@ -80,6 +80,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <platform>aarch64</platform>
diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/README.md b/Samples/4_CUDA_Libraries/cudaNvSci/README.md
index baac35c6..2e12e227 100644
--- a/Samples/4_CUDA_Libraries/cudaNvSci/README.md
+++ b/Samples/4_CUDA_Libraries/cudaNvSci/README.md
@@ -2,7 +2,7 @@
 
 ## Description
 
-This sample demonstrates CUDA-NvSciBuf/NvSciSync Interop. Two CPU threads import the NvSciBuf and NvSciSync into CUDA to perform two image processing algorithms on a ppm image - image rotation in 1st thread &amp;amp;amp;amp;amp;amp;amp;amp;amp;amp; rgba to grayscale conversion of rotated image in 2nd thread. Currently only supported on Ubuntu 18.04
+This sample demonstrates CUDA-NvSciBuf/NvSciSync Interop. Two CPU threads import the NvSciBuf and NvSciSync into CUDA to perform two image processing algorithms on a ppm image - image rotation in 1st thread &amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp; rgba to grayscale conversion of rotated image in 2nd thread. Currently only supported on Ubuntu 18.04
 
 ## Key Concepts
 
@@ -10,7 +10,7 @@ CUDA NvSci Interop, Data Parallel Algorithms, Image Processing
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -26,14 +26,14 @@ x86_64, aarch64
 cuDeviceGetUuid
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGetMipmappedArrayLevel, cudaImportNvSciImage, cudaImportExternalSemaphore, cudaNvSciApp, cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedMipmappedArray, cudaNvSciWait, cudaDestroyExternalMemory, cudaMemcpyAsync, cudaStreamDestroy, cudaSignalExternalSemaphoresAsync, cudaDeviceGetNvSciSyncAttributes, cudaFreeMipmappedArray, cudaMallocHost, cudaNvSci, cudaImportExternalMemory, cudaSetDevice, cudaImportNvSciRawBuf, cudaImportNvSciSemaphore, cudaGetDeviceCount, cudaDestroyTextureObject, cudaDeviceGetAttribute, cudaDestroyExternalSemaphore, cudaStreamSynchronize, cudaNvSciSignal, cudaFree, cudaDeviceId, cudaExternalMemoryGetMappedBuffer, cudaCreateTextureObject, cudaFreeHost, cudaWaitExternalSemaphoresAsync
+cudaExternalMemoryGetMappedBuffer, cudaImportExternalSemaphore, cudaDeviceGetAttribute, cudaNvSciSignal, cudaGetMipmappedArrayLevel, cudaImportNvSciRawBuf, cudaSetDevice, cudaImportNvSciImage, cudaNvSciApp, cudaDeviceId, cudaMallocHost, cudaSignalExternalSemaphoresAsync, cudaCreateTextureObject, cudaFreeHost, cudaNvSci, cudaNvSciWait, cudaGetDeviceCount, cudaMemcpyAsync, cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedMipmappedArray, cudaStreamDestroy, cudaDeviceGetNvSciSyncAttributes, cudaDestroyTextureObject, cudaDestroyExternalMemory, cudaImportExternalMemory, cudaDestroyExternalSemaphore, cudaFreeMipmappedArray, cudaFree, cudaStreamSynchronize, cudaWaitExternalSemaphoresAsync, cudaImportNvSciSemaphore
 
 ## Dependencies needed to build/run
 [NVSCI](../../../README.md#nvsci)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/findnvsci.mk b/Samples/4_CUDA_Libraries/cudaNvSci/findnvsci.mk
index 1934e138..0d6d157c 100644
--- a/Samples/4_CUDA_Libraries/cudaNvSci/findnvsci.mk
+++ b/Samples/4_CUDA_Libraries/cudaNvSci/findnvsci.mk
@@ -56,6 +56,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -101,6 +102,11 @@ ifeq ("$(TARGET_OS)","linux")
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
+    ifeq ("$(KYLIN)","0")
+      GLPATH    ?= /usr/lib64/nvidia
+      GLLINK    ?= -L/usr/lib64/nvidia
+      DFLT_PATH ?= /usr/lib64
+    endif
 
   NVSCIBUFLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscibuf.so    -print 2>/dev/null)
   NVSCISYNCLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscisync.so    -print 2>/dev/null)
diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile
index 390fb6f2..f9a79cde 100644
--- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile
+++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile
@@ -338,9 +338,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/NsightEclipse.xml b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/NsightEclipse.xml
index 31ebd06c..0b3b44cb 100644
--- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/NsightEclipse.xml
@@ -7,32 +7,32 @@
   </cflags>
   <cuda_api_list>
     <driver>cuDeviceGetUuid</driver>
-    <toolkit>cudaGetMipmappedArrayLevel</toolkit>
-    <toolkit>cudaImportNvSciImage</toolkit>
     <toolkit>cudaImportExternalSemaphore</toolkit>
+    <toolkit>cudaGetMipmappedArrayLevel</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaDestroySurfaceObject</toolkit>
+    <toolkit>cudaCreateSurfaceObject</toolkit>
+    <toolkit>cudaImportNvSciImage</toolkit>
+    <toolkit>cudaCreateChannelDesc</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
     <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaExternalMemoryGetMappedMipmappedArray</toolkit>
-    <toolkit>cudaDestroyExternalMemory</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
-    <toolkit>cudaDeviceGetNvSciSyncAttributes</toolkit>
-    <toolkit>cudaFreeMipmappedArray</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaDestroySurfaceObject</toolkit>
-    <toolkit>cudaImportExternalMemory</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaDestroyExternalSemaphore</toolkit>
-    <toolkit>cudaCreateChannelDesc</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaCreateSurfaceObject</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaDeviceGetNvSciSyncAttributes</toolkit>
+    <toolkit>cudaDestroyExternalMemory</toolkit>
+    <toolkit>cudaImportExternalMemory</toolkit>
+    <toolkit>cudaDestroyExternalSemaphore</toolkit>
+    <toolkit>cudaFreeMipmappedArray</toolkit>
     <toolkit>cudaImportNvSciSync</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates CUDA-NvMedia interop via NvSciBuf/NvSciSync APIs. Note that this sample only supports cross build from x86_64 to aarch64, aarch64 native build is not supported. For detailed workflow of the sample please check cudaNvSciNvMedia_Readme.pdf in the sample directory.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -77,6 +77,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>aarch64</arch>
diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/README.md b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/README.md
index 2ef3f769..9d1cd136 100644
--- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/README.md
+++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/README.md
@@ -10,7 +10,7 @@ CUDA NvSci Interop, Data Parallel Algorithms, Image Processing
 
 ## Supported SM Architectures
 
-[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -26,14 +26,14 @@ aarch64
 cuDeviceGetUuid
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGetMipmappedArrayLevel, cudaImportNvSciImage, cudaImportExternalSemaphore, cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedMipmappedArray, cudaDestroyExternalMemory, cudaMemcpyAsync, cudaStreamDestroy, cudaSignalExternalSemaphoresAsync, cudaDeviceGetNvSciSyncAttributes, cudaFreeMipmappedArray, cudaMallocHost, cudaDestroySurfaceObject, cudaImportExternalMemory, cudaMalloc, cudaSetDevice, cudaDestroyExternalSemaphore, cudaCreateChannelDesc, cudaStreamSynchronize, cudaFree, cudaFreeArray, cudaCreateSurfaceObject, cudaFreeHost, cudaMallocArray, cudaWaitExternalSemaphoresAsync, cudaImportNvSciSync
+cudaImportExternalSemaphore, cudaGetMipmappedArrayLevel, cudaSetDevice, cudaDestroySurfaceObject, cudaCreateSurfaceObject, cudaImportNvSciImage, cudaCreateChannelDesc, cudaMallocHost, cudaSignalExternalSemaphoresAsync, cudaFreeHost, cudaMemcpyAsync, cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedMipmappedArray, cudaMallocArray, cudaFreeArray, cudaStreamDestroy, cudaDeviceGetNvSciSyncAttributes, cudaDestroyExternalMemory, cudaImportExternalMemory, cudaDestroyExternalSemaphore, cudaFreeMipmappedArray, cudaImportNvSciSync, cudaFree, cudaStreamSynchronize, cudaMalloc, cudaWaitExternalSemaphoresAsync
 
 ## Dependencies needed to build/run
 [NVSCI](../../../README.md#nvsci), [NvMedia](../../../README.md#nvmedia)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvmedia.mk b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvmedia.mk
index 7b8cd1b6..23cfcd53 100644
--- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvmedia.mk
+++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvmedia.mk
@@ -56,6 +56,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -101,6 +102,11 @@ ifeq ("$(TARGET_OS)","linux")
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
+    ifeq ("$(KYLIN)","0")
+      GLPATH    ?= /usr/lib64/nvidia
+      GLLINK    ?= -L/usr/lib64/nvidia
+      DFLT_PATH ?= /usr/lib64
+    endif
 
   NVMEDIALIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvmedia.so -print 2>/dev/null)
 
diff --git a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvsci.mk b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvsci.mk
index 1934e138..0d6d157c 100644
--- a/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvsci.mk
+++ b/Samples/4_CUDA_Libraries/cudaNvSciNvMedia/findnvsci.mk
@@ -56,6 +56,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -101,6 +102,11 @@ ifeq ("$(TARGET_OS)","linux")
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
+    ifeq ("$(KYLIN)","0")
+      GLPATH    ?= /usr/lib64/nvidia
+      GLLINK    ?= -L/usr/lib64/nvidia
+      DFLT_PATH ?= /usr/lib64
+    endif
 
   NVSCIBUFLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscibuf.so    -print 2>/dev/null)
   NVSCISYNCLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libnvscisync.so    -print 2>/dev/null)
diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/freeImageInteropNPP/NsightEclipse.xml
index 0296542c..33f80755 100644
--- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/NsightEclipse.xml
@@ -3,11 +3,11 @@
 <entry>
   <name>freeImageInteropNPP</name>
   <cuda_api_list>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaDriverGetVersion</toolkit>
-    <toolkit>cudaDeviceInit</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaRuntimeGetVersion</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaDeviceInit</toolkit>
+    <toolkit>cudaDriverGetVersion</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[A simple CUDA Sample demonstrate how to use FreeImage library with NPP.]]></description>
@@ -65,6 +65,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/README.md b/Samples/4_CUDA_Libraries/freeImageInteropNPP/README.md
index 20361fe8..96e072b6 100644
--- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/README.md
+++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGetDeviceCount, cudaDriverGetVersion, cudaDeviceInit, cudaSetDevice, cudaRuntimeGetVersion, cudaGetDeviceProperties
+cudaRuntimeGetVersion, cudaSetDevice, cudaGetDeviceCount, cudaDeviceInit, cudaDriverGetVersion, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2017.vcxproj
index e791cbe1..b81f5f26 100644
--- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2019.vcxproj
index d6b27ff1..03289595 100644
--- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2022.vcxproj
index 6b803978..624cfaa8 100644
--- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/histEqualizationNPP/NsightEclipse.xml
index eadb5438..9b1554bb 100644
--- a/Samples/4_CUDA_Libraries/histEqualizationNPP/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/NsightEclipse.xml
@@ -3,14 +3,14 @@
 <entry>
   <name>histEqualizationNPP</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaDriverGetVersion</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaDeviceInit</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaRuntimeGetVersion</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaDeviceInit</toolkit>
+    <toolkit>cudaDriverGetVersion</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This CUDA Sample demonstrates how to use NPP for histogram equalization for image data.]]></description>
@@ -71,6 +71,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/README.md b/Samples/4_CUDA_Libraries/histEqualizationNPP/README.md
index dff01a26..ecf77bcb 100644
--- a/Samples/4_CUDA_Libraries/histEqualizationNPP/README.md
+++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/README.md
@@ -10,7 +10,7 @@ Image Processing, Performance Strategies, NPP Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGetDeviceCount, cudaDriverGetVersion, cudaMalloc, cudaDeviceInit, cudaSetDevice, cudaRuntimeGetVersion, cudaMemcpy, cudaGetDeviceProperties
+cudaRuntimeGetVersion, cudaMemcpy, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaDeviceInit, cudaDriverGetVersion, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [FreeImage](../../../README.md#freeimage), [NPP](../../../README.md#npp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2017.vcxproj
index a97516a6..6eb23e16 100644
--- a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2019.vcxproj
index 2a220a88..6dd57051 100644
--- a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2022.vcxproj
index b1780d1d..3f16252a 100644
--- a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/lineOfSight/Makefile b/Samples/4_CUDA_Libraries/lineOfSight/Makefile
index 55adba98..21b842fe 100644
--- a/Samples/4_CUDA_Libraries/lineOfSight/Makefile
+++ b/Samples/4_CUDA_Libraries/lineOfSight/Makefile
@@ -304,9 +304,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/lineOfSight/NsightEclipse.xml b/Samples/4_CUDA_Libraries/lineOfSight/NsightEclipse.xml
index c97e235a..e279a4fc 100644
--- a/Samples/4_CUDA_Libraries/lineOfSight/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/lineOfSight/NsightEclipse.xml
@@ -6,11 +6,11 @@
     <flag>--std=c++14</flag>
   </cflags>
   <cuda_api_list>
+    <toolkit>cudaCreateChannelDesc</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
     <toolkit>cudaFreeArray</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaCreateChannelDesc</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample is an implementation of a simple line-of-sight algorithm: Given a height map and a ray originating at some observation point, it computes all the points along the ray that are visible from the observation point. The implementation is based on the Thrust library.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -57,6 +57,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/lineOfSight/README.md b/Samples/4_CUDA_Libraries/lineOfSight/README.md
index cbd95d40..e704d3e2 100644
--- a/Samples/4_CUDA_Libraries/lineOfSight/README.md
+++ b/Samples/4_CUDA_Libraries/lineOfSight/README.md
@@ -10,7 +10,7 @@ Thrust Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFreeArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMallocArray, cudaCreateChannelDesc
+cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaDeviceSynchronize, cudaCreateTextureObject
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2017.vcxproj b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2017.vcxproj
index 0e4b882b..14b93fad 100644
--- a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/lineOfSight.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2019.vcxproj b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2019.vcxproj
index 937d7a1b..e2dc1bf9 100644
--- a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/lineOfSight.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2022.vcxproj b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2022.vcxproj
index 41f70879..2b15511d 100644
--- a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/lineOfSight.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/Makefile b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/Makefile
index c5e76fb9..dc4bc8dd 100644
--- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/Makefile
+++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/NsightEclipse.xml b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/NsightEclipse.xml
index 6f7125d9..8c19d35a 100644
--- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/NsightEclipse.xml
@@ -3,14 +3,14 @@
 <entry>
   <name>matrixMulCUBLAS</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements matrix multiplication from Chapter 3 of the programming guide. To illustrate GPU performance for matrix multiply, this sample also shows how to use the new CUDA 4.0 interface for CUBLAS to demonstrate high-performance performance for matrix multiplication.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -57,6 +57,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <sources>
     <ignore>new/matrixMulCUBLAS.cpp</ignore>
   </sources>
diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/README.md b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/README.md
index 1390e9eb..a5d68353 100644
--- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/README.md
+++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/README.md
@@ -10,7 +10,7 @@ CUDA Runtime API, Performance Strategies, Linear Algebra, CUBLAS
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaEventSynchronize, cudaEventRecord, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [CUBLAS](../../../README.md#cublas)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2017.vcxproj b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2017.vcxproj
index ccc83a66..7cf90b09 100644
--- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/matrixMulCUBLAS.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2019.vcxproj b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2019.vcxproj
index 6f820ba1..1665d0fa 100644
--- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/matrixMulCUBLAS.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2022.vcxproj b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2022.vcxproj
index 4b4d8722..e9257bfb 100644
--- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/matrixMulCUBLAS.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/nvJPEG/NsightEclipse.xml b/Samples/4_CUDA_Libraries/nvJPEG/NsightEclipse.xml
index 90114781..baba3f94 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/nvJPEG/NsightEclipse.xml
@@ -3,18 +3,18 @@
 <entry>
   <name>nvJPEG</name>
   <cuda_api_list>
+    <toolkit>cudaHostAlloc</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaHostAlloc</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA Sample that demonstrates single and batched decoding of jpeg images using NVJPEG Library.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -58,6 +58,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/nvJPEG/README.md b/Samples/4_CUDA_Libraries/nvJPEG/README.md
index e1d3f7fe..a54a46ca 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG/README.md
+++ b/Samples/4_CUDA_Libraries/nvJPEG/README.md
@@ -10,7 +10,7 @@ Image Decoding, NVJPEG Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaEventRecord, cudaHostAlloc, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaStreamSynchronize, cudaGetDeviceProperties
+cudaHostAlloc, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaFree, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [NVJPEG](../../../README.md#nvjpeg)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2017.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2017.vcxproj
index 5abe980b..c5931b66 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2019.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2019.vcxproj
index 1bae3470..05906aff 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2022.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2022.vcxproj
index 9b68e545..f861ff46 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/NsightEclipse.xml b/Samples/4_CUDA_Libraries/nvJPEG_encoder/NsightEclipse.xml
index 157b43cd..fa59430a 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/NsightEclipse.xml
@@ -4,14 +4,14 @@
   <name>nvJPEG_encoder</name>
   <cuda_api_list>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA Sample that demonstrates single encoding of jpeg images using NVJPEG Library.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -55,6 +55,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/README.md b/Samples/4_CUDA_Libraries/nvJPEG_encoder/README.md
index d3170c4b..09e2227b 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/README.md
+++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/README.md
@@ -10,7 +10,7 @@ Image Encoding, NVJPEG Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaMalloc, cudaGetErrorString, cudaGetDeviceProperties
+cudaFree, cudaGetErrorString, cudaEventSynchronize, cudaDeviceSynchronize, cudaEventRecord, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Dependencies needed to build/run
 [NVJPEG](../../../README.md#nvjpeg)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj
index bd7bdf90..22a09377 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj
index bf27a1ed..4e23250e 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2022.vcxproj b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2022.vcxproj
index fcef7dc3..735c9892 100644
--- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/oceanFFT/Makefile b/Samples/4_CUDA_Libraries/oceanFFT/Makefile
index 333096a4..eaa57172 100644
--- a/Samples/4_CUDA_Libraries/oceanFFT/Makefile
+++ b/Samples/4_CUDA_Libraries/oceanFFT/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/oceanFFT/NsightEclipse.xml b/Samples/4_CUDA_Libraries/oceanFFT/NsightEclipse.xml
index 3c846efc..8ac28a1b 100644
--- a/Samples/4_CUDA_Libraries/oceanFFT/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/oceanFFT/NsightEclipse.xml
@@ -3,17 +3,17 @@
 <entry>
   <name>oceanFFT</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaUpdateHeightmapKernel</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaCalculateSlopeKernel</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGenerateSpectrumKernel</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaCalculateSlopeKernel</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaUpdateHeightmapKernel</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaGenerateSpectrumKernel</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample simulates an Ocean height field using CUFFT Library and renders the result using OpenGL.]]></description>
@@ -85,6 +85,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/oceanFFT/README.md b/Samples/4_CUDA_Libraries/oceanFFT/README.md
index e6f1b2a2..00f7aa57 100644
--- a/Samples/4_CUDA_Libraries/oceanFFT/README.md
+++ b/Samples/4_CUDA_Libraries/oceanFFT/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing, CUFFT Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaUpdateHeightmapKernel, cudaGraphicsGLRegisterBuffer, cudaCalculateSlopeKernel, cudaGraphicsResourceGetMappedPointer, cudaMalloc, cudaGenerateSpectrumKernel, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaMemcpy, cudaMalloc, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaCalculateSlopeKernel, cudaGraphicsMapResources, cudaUpdateHeightmapKernel, cudaGraphicsUnregisterResource, cudaGenerateSpectrumKernel, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl), [CUFFT](../../../README.md#cufft)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/oceanFFT/findgllib.mk b/Samples/4_CUDA_Libraries/oceanFFT/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/4_CUDA_Libraries/oceanFFT/findgllib.mk
+++ b/Samples/4_CUDA_Libraries/oceanFFT/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2017.vcxproj b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2017.vcxproj
index ff470ed3..09d8130b 100644
--- a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/oceanFFT.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2019.vcxproj b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2019.vcxproj
index 2283b05f..84a21720 100644
--- a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/oceanFFT.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2022.vcxproj b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2022.vcxproj
index 5e71f639..9a86a338 100644
--- a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/oceanFFT.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/randomFog/NsightEclipse.xml b/Samples/4_CUDA_Libraries/randomFog/NsightEclipse.xml
index 71960c72..467d2ef0 100644
--- a/Samples/4_CUDA_Libraries/randomFog/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/randomFog/NsightEclipse.xml
@@ -7,9 +7,9 @@
   </cflags>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample illustrates pseudo- and quasi- random numbers produced by CURAND.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -76,6 +76,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/randomFog/README.md b/Samples/4_CUDA_Libraries/randomFog/README.md
index 9a477994..e101b5b6 100644
--- a/Samples/4_CUDA_Libraries/randomFog/README.md
+++ b/Samples/4_CUDA_Libraries/randomFog/README.md
@@ -10,7 +10,7 @@ This sample illustrates pseudo- and quasi- random numbers produced by CURAND.
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaMemcpy, cudaGetErrorString
+cudaMalloc, cudaGetErrorString, cudaMemcpy, cudaFree
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl), [CURAND](../../../README.md#curand)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/randomFog/findgllib.mk b/Samples/4_CUDA_Libraries/randomFog/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/4_CUDA_Libraries/randomFog/findgllib.mk
+++ b/Samples/4_CUDA_Libraries/randomFog/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2017.vcxproj b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2017.vcxproj
index 6c354dee..4e8773d6 100644
--- a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2019.vcxproj b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2019.vcxproj
index ec4d468c..52ae3ec8 100644
--- a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2022.vcxproj b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2022.vcxproj
index 680161ec..126ba72f 100644
--- a/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/randomFog/randomFog_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUBLAS/NsightEclipse.xml
index 14436606..47e06575 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/NsightEclipse.xml
@@ -50,6 +50,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/README.md b/Samples/4_CUDA_Libraries/simpleCUBLAS/README.md
index d398137e..dbb814b9 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS/README.md
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/README.md
@@ -10,7 +10,7 @@ Image Processing, CUBLAS Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -30,7 +30,7 @@ cudaMalloc, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj
index 569946e5..181e913b 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj
index 1870c7cc..f0994fdf 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2022.vcxproj
index 21e6dcf2..9640014c 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUBLASXT/NsightEclipse.xml
index 9aa98823..47bd44a5 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/NsightEclipse.xml
@@ -3,9 +3,9 @@
 <entry>
   <name>simpleCUBLASXT</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[Example of using CUBLAS-XT library which performs GEMM operations over Multiple GPUs.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -50,6 +50,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/README.md b/Samples/4_CUDA_Libraries/simpleCUBLASXT/README.md
index 37d6d4e9..0d8c9695 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/README.md
+++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/README.md
@@ -10,7 +10,7 @@ CUBLAS-XT Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGetDeviceCount, cudaGetDeviceProperties
+cudaGetDeviceProperties, cudaGetDeviceCount, cudaFree
 
 ## Dependencies needed to build/run
 [CUBLAS](../../../README.md#cublas)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj
index eec501a7..29702658 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj
index 06446054..32a4ace1 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2022.vcxproj
index 114ef777..c900da38 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/Makefile b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/Makefile
index 2a75e025..86638c5e 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/Makefile
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/Makefile
@@ -291,9 +291,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/NsightEclipse.xml
index 2b64842d..3dcea4ef 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/NsightEclipse.xml
@@ -4,9 +4,9 @@
   <name>simpleCUBLAS_LU</name>
   <cuda_api_list>
     <toolkit>cudaGetErrorEnum</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[CUDA sample demonstrating cuBLAS API cublasDgetrfBatched() for lower-upper (LU) decomposition of a matrix.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/README.md b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/README.md
index 94bdb4e8..2b1b93dc 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/README.md
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/README.md
@@ -10,7 +10,7 @@ CUBLAS Library, LU decomposition
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGetErrorEnum, cudaFree, cudaMalloc, cudaMemcpy
+cudaGetErrorEnum, cudaMalloc, cudaMemcpy, cudaFree
 
 ## Dependencies needed to build/run
 [CUBLAS](../../../README.md#cublas)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj
index 9f13e4e8..bffe80d5 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleCUBLAS_LU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj
index 9c1278ca..18c1b117 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUBLAS_LU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2022.vcxproj
index 4e775803..2ff51879 100644
--- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUBLAS_LU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/Makefile b/Samples/4_CUDA_Libraries/simpleCUFFT/Makefile
index 3c37107d..080c25d2 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT/Makefile
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUFFT/NsightEclipse.xml
index cd63fbb5..6ba60d80 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT/NsightEclipse.xml
@@ -4,8 +4,8 @@
   <name>simpleCUFFT</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[Example of using CUFFT. In this example, CUFFT is used to compute the 1D-convolution of some signal with some filter by transforming both into frequency domain, multiplying them together, and transforming the signal back to time domain. cuFFT plans are created using simple and advanced API functions.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -49,6 +49,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/README.md b/Samples/4_CUDA_Libraries/simpleCUFFT/README.md
index 1d8cb404..e91252be 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT/README.md
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT/README.md
@@ -10,7 +10,7 @@ Image Processing, CUFFT Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaMemcpy
+cudaMalloc, cudaMemcpy, cudaFree
 
 ## Dependencies needed to build/run
 [CUFFT](../../../README.md#cufft)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2017.vcxproj
index dadcc1f8..a6e80d87 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleCUFFT.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2019.vcxproj
index 4920b00d..5eb64892 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUFFT.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2022.vcxproj
index 0b3dd99a..2c598379 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUFFT.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/Makefile b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/Makefile
index 610b3cc9..c21a0c60 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/Makefile
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/NsightEclipse.xml
index 55dddd19..a22e53a0 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/NsightEclipse.xml
@@ -6,13 +6,13 @@
     <preprocessor>_USE_MATH_DEFINES</preprocessor>
   </additional_preprocessor>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaXtFree</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[Example of using CUFFT. In this example, CUFFT is used to compute the 2D-convolution of some signal with some filter by transforming both into frequency domain, multiplying them together, and transforming the signal back to time domain on Multiple GPU.]]></description>
@@ -57,6 +57,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md
index 0c941fbb..9cd1ad57 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md
@@ -10,7 +10,7 @@ Image Processing, CUFFT Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaXtFree, cudaGetDeviceCount, cudaDeviceSynchronize, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetDeviceProperties
+cudaXtFree, cudaMemcpy, cudaFree, cudaSetDevice, cudaGetDeviceCount, cudaDeviceSynchronize, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CUFFT](../../../README.md#cufft)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2017.vcxproj
index 219ca46b..836ea063 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleCUFFT_2d_MGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2019.vcxproj
index d8d5f360..86780b70 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUFFT_2d_MGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2022.vcxproj
index 44c8dc9a..6e6b95b9 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUFFT_2d_MGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/Makefile b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/Makefile
index 91eef96c..94cb18d7 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/Makefile
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/NsightEclipse.xml
index 5af2b802..f7274f8a 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/NsightEclipse.xml
@@ -3,10 +3,10 @@
 <entry>
   <name>simpleCUFFT_MGPU</name>
   <cuda_api_list>
+    <toolkit>cudaXtFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaXtFree</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[Example of using CUFFT. In this example, CUFFT is used to compute the 1D-convolution of some signal with some filter by transforming both into frequency domain, multiplying them together, and transforming the signal back to time domain on Multiple GPU.]]></description>
@@ -51,6 +51,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md
index 9d894764..bfb6e031 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md
@@ -10,7 +10,7 @@ Image Processing, CUFFT Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGetDeviceCount, cudaDeviceSynchronize, cudaSetDevice, cudaXtFree, cudaGetDeviceProperties
+cudaXtFree, cudaSetDevice, cudaGetDeviceCount, cudaDeviceSynchronize, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [CUFFT](../../../README.md#cufft)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2017.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2017.vcxproj
index 8b222ff9..c9da7911 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleCUFFT_MGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2019.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2019.vcxproj
index 06f27404..694fc4fd 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUFFT_MGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2022.vcxproj b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2022.vcxproj
index 8a2ca6a0..a2f9ad9c 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleCUFFT_MGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/Makefile b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/Makefile
index b9577d0d..c5159bed 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/Makefile
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/Makefile
@@ -316,9 +316,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 70 72 75 80 86 87
+SMS ?= 53 70 72 75 80 86 87 90
 else
-SMS ?= 35 50 60 70 75 80 86
+SMS ?= 35 50 60 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/NsightEclipse.xml b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/NsightEclipse.xml
index ca2c23e6..eaa551ee 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/NsightEclipse.xml
@@ -7,12 +7,12 @@
     <flag>-std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMemcpyFromSymbol</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
     <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[Example of using CUFFT. In this example, CUFFT is used to compute the 1D-convolution of some signal with some filter by transforming both into frequency domain, multiplying them together, and transforming the signal back to time domain. The difference between this example and the Simple CUFFT example is that the multiplication step is done by the CUFFT kernel with a user-supplied CUFFT callback routine, rather than by a separate kernel call.]]></description>
   <devicecompilation>separate</devicecompilation>
@@ -60,6 +60,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/README.md b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/README.md
index 29938002..71cd8ad1 100644
--- a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/README.md
+++ b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/README.md
@@ -10,7 +10,7 @@ Image Processing, CUFFT Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaMalloc, cudaMemcpy, cudaMemcpyFromSymbol, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaFree, cudaMemcpyFromSymbol, cudaGetDevice, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [callback](../../../README.md#callback), [CUFFT](../../../README.md#cufft)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/NsightEclipse.xml
index 3d351a11..b16c392f 100644
--- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/NsightEclipse.xml
+++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/NsightEclipse.xml
@@ -3,14 +3,14 @@
 <entry>
   <name>watershedSegmentationNPP</name>
   <cuda_api_list>
+    <toolkit>cudaRuntimeGetVersion</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceGetAttribute</toolkit>
     <toolkit>cudaDriverGetVersion</toolkit>
+    <toolkit>cudaGetDevice</toolkit>
     <toolkit>cudaStreamGetFlags</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaRuntimeGetVersion</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDevice</toolkit>
   </cuda_api_list>
   <description><![CDATA[An NPP CUDA Sample that demonstrates how to use the NPP watershed segmentation function.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -62,6 +62,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/README.md b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/README.md
index c06333e2..540e7443 100644
--- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/README.md
+++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceGetAttribute, cudaDriverGetVersion, cudaStreamGetFlags, cudaStreamSynchronize, cudaRuntimeGetVersion, cudaGetDeviceProperties, cudaGetDevice
+cudaRuntimeGetVersion, cudaFree, cudaDeviceGetAttribute, cudaDriverGetVersion, cudaGetDevice, cudaStreamGetFlags, cudaStreamSynchronize, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [NPP](../../../README.md#npp)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj
index 6df4766b..c752f135 100644
--- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj
+++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj
index 494b75bc..10f8fef8 100644
--- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj
+++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2022.vcxproj b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2022.vcxproj
index e4e32e49..d960f01c 100644
--- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2022.vcxproj
+++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2017.vcxproj b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2017.vcxproj
index 625c2f57..c63b2d70 100644
--- a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/BlackScholes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2019.vcxproj b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2019.vcxproj
index d872e998..adf0db2d 100644
--- a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/BlackScholes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2022.vcxproj b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2022.vcxproj
index 69831822..aa30adea 100644
--- a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/BlackScholes/BlackScholes_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/BlackScholes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/BlackScholes/Makefile b/Samples/5_Domain_Specific/BlackScholes/Makefile
index b7d3c529..7935e540 100644
--- a/Samples/5_Domain_Specific/BlackScholes/Makefile
+++ b/Samples/5_Domain_Specific/BlackScholes/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/BlackScholes/NsightEclipse.xml b/Samples/5_Domain_Specific/BlackScholes/NsightEclipse.xml
index ab1f601d..8af9aa6d 100644
--- a/Samples/5_Domain_Specific/BlackScholes/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/BlackScholes/NsightEclipse.xml
@@ -7,9 +7,9 @@
   </cflags>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample evaluates fair call and put prices for a given set of European options by Black-Scholes formula.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -50,6 +50,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/BlackScholes/README.md b/Samples/5_Domain_Specific/BlackScholes/README.md
index 858b17e6..2d535490 100644
--- a/Samples/5_Domain_Specific/BlackScholes/README.md
+++ b/Samples/5_Domain_Specific/BlackScholes/README.md
@@ -10,7 +10,7 @@ Computational Finance
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy
+cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.vcxproj b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.vcxproj
index 01556903..0432f89e 100644
--- a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.vcxproj b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.vcxproj
index 309579cc..c97e0a3c 100644
--- a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2022.vcxproj b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2022.vcxproj
index d65aea1d..3796da00 100644
--- a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/README.md b/Samples/5_Domain_Specific/BlackScholes_nvrtc/README.md
index b0f95581..a0e4aa67 100644
--- a/Samples/5_Domain_Specific/BlackScholes_nvrtc/README.md
+++ b/Samples/5_Domain_Specific/BlackScholes_nvrtc/README.md
@@ -10,7 +10,7 @@ Computational Finance, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 cudaBlockSize, cudaGridSize
@@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2017.vcxproj b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2017.vcxproj
index 1227bc24..09368fc9 100644
--- a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/FDTD3d.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2019.vcxproj b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2019.vcxproj
index 10f2dc23..09c34005 100644
--- a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/FDTD3d.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2022.vcxproj b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2022.vcxproj
index 8f01460a..6af53b8b 100644
--- a/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/FDTD3d/FDTD3d_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/FDTD3d.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/FDTD3d/Makefile b/Samples/5_Domain_Specific/FDTD3d/Makefile
index 9f41e1d1..bbbee3e3 100644
--- a/Samples/5_Domain_Specific/FDTD3d/Makefile
+++ b/Samples/5_Domain_Specific/FDTD3d/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/FDTD3d/NsightEclipse.xml b/Samples/5_Domain_Specific/FDTD3d/NsightEclipse.xml
index 68eaef75..7528550b 100644
--- a/Samples/5_Domain_Specific/FDTD3d/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/FDTD3d/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>FDTD3d</name>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaFuncGetAttributes</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaFuncGetAttributes</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample applies a finite differences time domain progression stencil on a 3D surface.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -60,6 +60,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/FDTD3d/README.md b/Samples/5_Domain_Specific/FDTD3d/README.md
index c884443d..80774398 100644
--- a/Samples/5_Domain_Specific/FDTD3d/README.md
+++ b/Samples/5_Domain_Specific/FDTD3d/README.md
@@ -10,7 +10,7 @@ Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaFree, cudaEventRecord, cudaEventCreate, cudaFuncGetAttributes, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaMalloc, cudaFree, cudaFuncGetAttributes, cudaSetDevice, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaMemcpyToSymbol, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/FlowCPU.flo b/Samples/5_Domain_Specific/HSOpticalFlow/FlowCPU.flo
new file mode 100644
index 00000000..d92d4549
Binary files /dev/null and b/Samples/5_Domain_Specific/HSOpticalFlow/FlowCPU.flo differ
diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/FlowGPU.flo b/Samples/5_Domain_Specific/HSOpticalFlow/FlowGPU.flo
new file mode 100644
index 00000000..45e8df6f
Binary files /dev/null and b/Samples/5_Domain_Specific/HSOpticalFlow/FlowGPU.flo differ
diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2017.vcxproj b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2017.vcxproj
index 26fe77dc..28f83ead 100644
--- a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/HSOpticalFlow.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2019.vcxproj b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2019.vcxproj
index 563a57be..f9a6290c 100644
--- a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/HSOpticalFlow.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2022.vcxproj b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2022.vcxproj
index 241a3653..fe372f4e 100644
--- a/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/HSOpticalFlow/HSOpticalFlow_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/HSOpticalFlow.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/Makefile b/Samples/5_Domain_Specific/HSOpticalFlow/Makefile
index f6f1a087..98ce7ac6 100644
--- a/Samples/5_Domain_Specific/HSOpticalFlow/Makefile
+++ b/Samples/5_Domain_Specific/HSOpticalFlow/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/NsightEclipse.xml b/Samples/5_Domain_Specific/HSOpticalFlow/NsightEclipse.xml
index 3de542e3..548a90c3 100644
--- a/Samples/5_Domain_Specific/HSOpticalFlow/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/HSOpticalFlow/NsightEclipse.xml
@@ -4,9 +4,9 @@
   <name>HSOpticalFlow</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaMemset</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[Variational optical flow estimation example.  Uses textures for image operations. Shows how simple PDE solver can be accelerated with CUDA.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -52,6 +52,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/README.md b/Samples/5_Domain_Specific/HSOpticalFlow/README.md
index 363d7f17..195c8954 100644
--- a/Samples/5_Domain_Specific/HSOpticalFlow/README.md
+++ b/Samples/5_Domain_Specific/HSOpticalFlow/README.md
@@ -10,7 +10,7 @@ Image Processing, Data Parallel Algorithms
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaMemset, cudaFree, cudaMemcpy
+cudaMalloc, cudaMemcpy, cudaMemset, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/Mandelbrot/Makefile b/Samples/5_Domain_Specific/Mandelbrot/Makefile
index 3daf6eea..777aa73c 100644
--- a/Samples/5_Domain_Specific/Mandelbrot/Makefile
+++ b/Samples/5_Domain_Specific/Mandelbrot/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2017.vcxproj b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2017.vcxproj
index ef345fe8..4d840508 100644
--- a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/Mandelbrot.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -121,6 +121,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2019.vcxproj b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2019.vcxproj
index 64598393..f5774588 100644
--- a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/Mandelbrot.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2022.vcxproj b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2022.vcxproj
index 786f8178..ce2b9586 100644
--- a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/Mandelbrot.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/Mandelbrot/NsightEclipse.xml b/Samples/5_Domain_Specific/Mandelbrot/NsightEclipse.xml
index 00d22c18..d74f3c4f 100644
--- a/Samples/5_Domain_Specific/Mandelbrot/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/Mandelbrot/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>Mandelbrot</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaGLUnregisterBufferObject</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
     <toolkit>cudaGLUnmapBufferObject</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaGLMapBufferObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGLRegisterBufferObject</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGLMapBufferObject</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaGLUnregisterBufferObject</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaGLRegisterBufferObject</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample uses CUDA to compute and display the Mandelbrot or Julia sets interactively. It also illustrates the use of "double single" arithmetic to improve precision when zooming a long way into the pattern. This sample uses double precision.  Thanks to Mark Granger of NewTek who submitted this code sample.!]]></description>
@@ -79,6 +79,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/Mandelbrot/README.md b/Samples/5_Domain_Specific/Mandelbrot/README.md
index 57131463..a09cfeed 100644
--- a/Samples/5_Domain_Specific/Mandelbrot/README.md
+++ b/Samples/5_Domain_Specific/Mandelbrot/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsGLRegisterBuffer, cudaGLUnregisterBufferObject, cudaGraphicsResourceGetMappedPointer, cudaGLUnmapBufferObject, cudaDeviceSynchronize, cudaGLMapBufferObject, cudaMalloc, cudaGLRegisterBufferObject, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties
+cudaGLUnmapBufferObject, cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGLMapBufferObject, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaGLUnregisterBufferObject, cudaDeviceSynchronize, cudaGLRegisterBufferObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/Mandelbrot/findgllib.mk b/Samples/5_Domain_Specific/Mandelbrot/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/5_Domain_Specific/Mandelbrot/findgllib.mk
+++ b/Samples/5_Domain_Specific/Mandelbrot/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/Makefile b/Samples/5_Domain_Specific/MonteCarloMultiGPU/Makefile
index 503c9678..c38f7e44 100644
--- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/Makefile
+++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2017.vcxproj b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2017.vcxproj
index f37ca9a9..3330def6 100644
--- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/MonteCarloMultiGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2019.vcxproj b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2019.vcxproj
index 9c88c3f3..cf2d4ad2 100644
--- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MonteCarloMultiGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2022.vcxproj b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2022.vcxproj
index a047f1c0..852394d8 100644
--- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/MonteCarloMultiGPU.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/NsightEclipse.xml b/Samples/5_Domain_Specific/MonteCarloMultiGPU/NsightEclipse.xml
index 34d2d8e0..bbe55936 100644
--- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/NsightEclipse.xml
@@ -3,23 +3,23 @@
 <entry>
   <name>MonteCarloMultiGPU</name>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample evaluates fair call price for a given set of European options using the Monte Carlo approach, taking advantage of all CUDA-capable GPUs installed in the system. This sample use double precision hardware if a GTX 200 class GPU is present.  The sample also takes advantage of CUDA 4.0 capability to supporting using a single CPU thread to control multiple GPUs]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -69,6 +69,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/README.md b/Samples/5_Domain_Specific/MonteCarloMultiGPU/README.md
index 7a0f77cb..5eff98b7 100644
--- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/README.md
+++ b/Samples/5_Domain_Specific/MonteCarloMultiGPU/README.md
@@ -10,7 +10,7 @@ Random Number Generator, Computational Finance, CURAND Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaStreamDestroy, cudaEventRecord, cudaMallocHost, cudaStreamCreate, cudaEventCreate, cudaGetDeviceCount, cudaDeviceSynchronize, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpyAsync, cudaStreamSynchronize, cudaGetDeviceProperties
+cudaStreamDestroy, cudaMalloc, cudaFree, cudaMallocHost, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaMemset, cudaStreamSynchronize, cudaEventDestroy, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceCount, cudaEventCreate
 
 ## Dependencies needed to build/run
 [CURAND](../../../README.md#curand)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/Makefile b/Samples/5_Domain_Specific/NV12toBGRandResize/Makefile
index cb4fefd6..f0ea1f7c 100644
--- a/Samples/5_Domain_Specific/NV12toBGRandResize/Makefile
+++ b/Samples/5_Domain_Specific/NV12toBGRandResize/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj
index fe9e7d3b..845bded6 100644
--- a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/NV12toBGRandResize.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj
index b4c0cf63..88c92e39 100644
--- a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/NV12toBGRandResize.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2022.vcxproj b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2022.vcxproj
index 290d571c..09b89f94 100644
--- a/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/NV12toBGRandResize/NV12toBGRandResize_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/NV12toBGRandResize.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/NsightEclipse.xml b/Samples/5_Domain_Specific/NV12toBGRandResize/NsightEclipse.xml
index 6cd41314..cec28308 100644
--- a/Samples/5_Domain_Specific/NV12toBGRandResize/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/NV12toBGRandResize/NsightEclipse.xml
@@ -3,21 +3,21 @@
 <entry>
   <name>NV12toBGRandResize</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaMallocManaged</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaStreamAttachMemAsync</toolkit>
     <toolkit>cudaDestroyTextureObject</toolkit>
     <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaStreamAttachMemAsync</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This code shows two ways to convert and resize NV12 frames to BGR 3 planars frames using CUDA in batch. Way-1, Convert NV12 Input to BGR @ Input Resolution-1, then Resize to Resolution#2. Way-2, resize NV12 Input to Resolution#2 then convert it to BGR Output. NVIDIA HW Decoder, both dGPU and Tegra, normally outputs NV12 pitch format frames. For the inference using TensorRT, the input frame needs to be BGR planar format with possibly different size. So, conversion and resizing from NV12 to BGR planar is usually required for the inference following decoding. This CUDA code provides a reference implementation for conversion and resizing.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -58,6 +58,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/README.md b/Samples/5_Domain_Specific/NV12toBGRandResize/README.md
index 738dd5b3..f7899913 100644
--- a/Samples/5_Domain_Specific/NV12toBGRandResize/README.md
+++ b/Samples/5_Domain_Specific/NV12toBGRandResize/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing, Video Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaEventRecord, cudaMallocManaged, cudaStreamCreate, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaEventSynchronize, cudaStreamAttachMemAsync, cudaCreateTextureObject, cudaMalloc, cudaEventDestroy, cudaMemcpy
+cudaMemcpy, cudaStreamDestroy, cudaMalloc, cudaFree, cudaMallocManaged, cudaStreamAttachMemAsync, cudaDestroyTextureObject, cudaEventSynchronize, cudaDeviceSynchronize, cudaCreateTextureObject, cudaEventRecord, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/SLID3D10Texture/README.md b/Samples/5_Domain_Specific/SLID3D10Texture/README.md
index 0c62e600..ddd18f04 100644
--- a/Samples/5_Domain_Specific/SLID3D10Texture/README.md
+++ b/Samples/5_Domain_Specific/SLID3D10Texture/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Graphics Interop, Image Processing, 2D Textures
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -26,14 +26,14 @@ x86_64
 cuCtxPushCurrent, cuCtxPopCurrent
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsResourceSetMapFlags, cudaGetDeviceCount, cudaMallocPitch, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGetLastError, cudaGraphicsUnmapResources, cudaGetErrorString, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaGetErrorString, cudaFree, cudaGraphicsResourceSetMapFlags, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2017.vcxproj b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2017.vcxproj
index 4e8e897e..3b3d82ca 100644
--- a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/SLID3D10Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2019.vcxproj b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2019.vcxproj
index a7944ca5..96c3165a 100644
--- a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/SLID3D10Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2022.vcxproj b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2022.vcxproj
index b0a7dccf..e26cc9a4 100644
--- a/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/SLID3D10Texture/SLID3D10Texture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/SLID3D10Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/SobelFilter/Makefile b/Samples/5_Domain_Specific/SobelFilter/Makefile
index bc7f79a2..16893f8c 100644
--- a/Samples/5_Domain_Specific/SobelFilter/Makefile
+++ b/Samples/5_Domain_Specific/SobelFilter/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/SobelFilter/NsightEclipse.xml b/Samples/5_Domain_Specific/SobelFilter/NsightEclipse.xml
index 48e1cb0d..1409139c 100644
--- a/Samples/5_Domain_Specific/SobelFilter/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/SobelFilter/NsightEclipse.xml
@@ -3,20 +3,20 @@
 <entry>
   <name>SobelFilter</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements the Sobel edge detection filter for 8-bit monochrome images.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -89,6 +89,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/SobelFilter/README.md b/Samples/5_Domain_Specific/SobelFilter/README.md
index 2c8b2564..f33e8df7 100644
--- a/Samples/5_Domain_Specific/SobelFilter/README.md
+++ b/Samples/5_Domain_Specific/SobelFilter/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetErrorString
+cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaGetErrorString, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2017.vcxproj b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2017.vcxproj
index 4914918d..bb26dfe6 100644
--- a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/SobelFilter.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2019.vcxproj b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2019.vcxproj
index 6bef3010..8dcd7a83 100644
--- a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/SobelFilter.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2022.vcxproj b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2022.vcxproj
index cffc5757..d9f50ec3 100644
--- a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/SobelFilter/SobelFilter_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/SobelFilter.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/SobelFilter/findgllib.mk b/Samples/5_Domain_Specific/SobelFilter/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/5_Domain_Specific/SobelFilter/findgllib.mk
+++ b/Samples/5_Domain_Specific/SobelFilter/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/SobolQRNG/Makefile b/Samples/5_Domain_Specific/SobolQRNG/Makefile
index 080a5c7c..7d80c57e 100644
--- a/Samples/5_Domain_Specific/SobolQRNG/Makefile
+++ b/Samples/5_Domain_Specific/SobolQRNG/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/SobolQRNG/NsightEclipse.xml b/Samples/5_Domain_Specific/SobolQRNG/NsightEclipse.xml
index d12d97cc..cddf025f 100644
--- a/Samples/5_Domain_Specific/SobolQRNG/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/SobolQRNG/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>SobolQRNG</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetErrorString</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements Sobol Quasirandom Sequence Generator.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -50,6 +50,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/SobolQRNG/README.md b/Samples/5_Domain_Specific/SobolQRNG/README.md
index 13789f59..72bb3f84 100644
--- a/Samples/5_Domain_Specific/SobolQRNG/README.md
+++ b/Samples/5_Domain_Specific/SobolQRNG/README.md
@@ -10,7 +10,7 @@ Computational Finance
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaGetErrorString, cudaFree, cudaDeviceSynchronize, cudaGetDevice, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2017.vcxproj b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2017.vcxproj
index ba88ef6a..2dc8daa7 100644
--- a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/SobolQRNG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2019.vcxproj b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2019.vcxproj
index acb7b91a..1f741088 100644
--- a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/SobolQRNG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2022.vcxproj b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2022.vcxproj
index 26344f69..d54706be 100644
--- a/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/SobolQRNG/SobolQRNG_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/SobolQRNG.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/VFlockingD3D10/README.md b/Samples/5_Domain_Specific/VFlockingD3D10/README.md
index 0db3f2f8..613b5663 100644
--- a/Samples/5_Domain_Specific/VFlockingD3D10/README.md
+++ b/Samples/5_Domain_Specific/VFlockingD3D10/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation, Perform
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaGraphicsMapResources, cudaEventRecord, cudaGraphicsUnregisterResource, cudaEventCreate, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaGraphicsResourceSetMapFlags, cudaMalloc, cudaGetLastError, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGetErrorString, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaMemset, cudaGraphicsUnregisterResource, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2017.vcxproj b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2017.vcxproj
index 37fe522b..7744ff26 100644
--- a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/VFlockingD3D10.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -110,6 +110,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2019.vcxproj b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2019.vcxproj
index de8440f7..3c776cc9 100644
--- a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/VFlockingD3D10.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2022.vcxproj b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2022.vcxproj
index 0213cd64..65635ece 100644
--- a/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/VFlockingD3D10/VFlockingD3D10_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/VFlockingD3D10.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/bicubicTexture/Makefile b/Samples/5_Domain_Specific/bicubicTexture/Makefile
index 2e3a5627..cc6c2680 100644
--- a/Samples/5_Domain_Specific/bicubicTexture/Makefile
+++ b/Samples/5_Domain_Specific/bicubicTexture/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/bicubicTexture/NsightEclipse.xml b/Samples/5_Domain_Specific/bicubicTexture/NsightEclipse.xml
index b9f435b0..bb42ff84 100644
--- a/Samples/5_Domain_Specific/bicubicTexture/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/bicubicTexture/NsightEclipse.xml
@@ -3,20 +3,20 @@
 <entry>
   <name>bicubicTexture</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaCreateChannelDesc</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaCreateChannelDesc</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
+    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates how to efficiently implement a Bicubic B-spline interpolation filter with CUDA texture.]]></description>
@@ -86,6 +86,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/bicubicTexture/README.md b/Samples/5_Domain_Specific/bicubicTexture/README.md
index db1fc40f..c972ebda 100644
--- a/Samples/5_Domain_Specific/bicubicTexture/README.md
+++ b/Samples/5_Domain_Specific/bicubicTexture/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaCreateChannelDesc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaCreateChannelDesc, cudaMallocArray, cudaFreeArray, cudaFree, cudaMemcpy, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2017.vcxproj b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2017.vcxproj
index f7c07a0e..11e527bb 100644
--- a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/bicubicTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2019.vcxproj b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2019.vcxproj
index 9a4ebd55..dbb64abb 100644
--- a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bicubicTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2022.vcxproj b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2022.vcxproj
index 9bb627c0..a54bf6b9 100644
--- a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bicubicTexture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/bicubicTexture/data/teapot512.pgm b/Samples/5_Domain_Specific/bicubicTexture/data/teapot512.pgm
index 23db9b9a..4e5ca459 100644
Binary files a/Samples/5_Domain_Specific/bicubicTexture/data/teapot512.pgm and b/Samples/5_Domain_Specific/bicubicTexture/data/teapot512.pgm differ
diff --git a/Samples/5_Domain_Specific/bicubicTexture/findgllib.mk b/Samples/5_Domain_Specific/bicubicTexture/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/5_Domain_Specific/bicubicTexture/findgllib.mk
+++ b/Samples/5_Domain_Specific/bicubicTexture/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/bilateralFilter/Makefile b/Samples/5_Domain_Specific/bilateralFilter/Makefile
index 800dfc13..2a4ee06b 100644
--- a/Samples/5_Domain_Specific/bilateralFilter/Makefile
+++ b/Samples/5_Domain_Specific/bilateralFilter/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/bilateralFilter/NsightEclipse.xml b/Samples/5_Domain_Specific/bilateralFilter/NsightEclipse.xml
index 8ca67bd8..b9e13ffa 100644
--- a/Samples/5_Domain_Specific/bilateralFilter/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/bilateralFilter/NsightEclipse.xml
@@ -3,18 +3,18 @@
 <entry>
   <name>bilateralFilter</name>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaMallocPitch</toolkit>
-    <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
     <toolkit>cudaRuntimeGetVersion</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMallocPitch</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[Bilateral filter is an edge-preserving non-linear smoothing filter that is implemented with CUDA with OpenGL rendering. It can be used in image recovery and denoising. Each pixel is weight by considering both the spatial distance and color distance between its neighbors. Reference:"C. Tomasi, R. Manduchi, Bilateral Filtering for Gray and Color Images, proceeding of the ICCV, 1998, http://users.soe.ucsc.edu/~manduchi/Papers/ICCV98.pdf"]]></description>
@@ -86,6 +86,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/bilateralFilter/README.md b/Samples/5_Domain_Specific/bilateralFilter/README.md
index 8741c589..b31f086f 100644
--- a/Samples/5_Domain_Specific/bilateralFilter/README.md
+++ b/Samples/5_Domain_Specific/bilateralFilter/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaRuntimeGetVersion, cudaGetDeviceProperties
+cudaRuntimeGetVersion, cudaGraphicsUnmapResources, cudaMallocPitch, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemcpyToSymbol, cudaGraphicsUnregisterResource, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2017.vcxproj b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2017.vcxproj
index 5a64d5c9..66d5cb16 100644
--- a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/bilateralFilter.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -120,6 +120,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2019.vcxproj b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2019.vcxproj
index 23174936..90816212 100644
--- a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bilateralFilter.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -116,6 +116,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2022.vcxproj b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2022.vcxproj
index 3a4c27a8..8f7f94ad 100644
--- a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/bilateralFilter.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -116,6 +116,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/bilateralFilter/findgllib.mk b/Samples/5_Domain_Specific/bilateralFilter/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/5_Domain_Specific/bilateralFilter/findgllib.mk
+++ b/Samples/5_Domain_Specific/bilateralFilter/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/binomialOptions/Makefile b/Samples/5_Domain_Specific/binomialOptions/Makefile
index 2c85e8ce..7d498db9 100644
--- a/Samples/5_Domain_Specific/binomialOptions/Makefile
+++ b/Samples/5_Domain_Specific/binomialOptions/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/binomialOptions/NsightEclipse.xml b/Samples/5_Domain_Specific/binomialOptions/NsightEclipse.xml
index 0730f13d..f5273e49 100644
--- a/Samples/5_Domain_Specific/binomialOptions/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/binomialOptions/NsightEclipse.xml
@@ -3,8 +3,8 @@
 <entry>
   <name>binomialOptions</name>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
     <toolkit>cudaMemcpyFromSymbol</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample evaluates fair call price for a given set of European options under binomial model.]]></description>
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/binomialOptions/README.md b/Samples/5_Domain_Specific/binomialOptions/README.md
index 869a40cf..574d7e1f 100644
--- a/Samples/5_Domain_Specific/binomialOptions/README.md
+++ b/Samples/5_Domain_Specific/binomialOptions/README.md
@@ -10,7 +10,7 @@ Computational Finance
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaDeviceSynchronize, cudaMemcpyFromSymbol
+cudaDeviceSynchronize, cudaMemcpyToSymbol, cudaMemcpyFromSymbol
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2017.vcxproj b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2017.vcxproj
index a5a832ab..8416e033 100644
--- a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/binomialOptions.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -110,6 +110,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2019.vcxproj b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2019.vcxproj
index 95af1438..806fed73 100644
--- a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/binomialOptions.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2022.vcxproj b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2022.vcxproj
index 7ff63d93..616a1dc6 100644
--- a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/binomialOptions/binomialOptions_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/binomialOptions.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/README.md b/Samples/5_Domain_Specific/binomialOptions_nvrtc/README.md
index 67b0cf18..0d96c7ef 100644
--- a/Samples/5_Domain_Specific/binomialOptions_nvrtc/README.md
+++ b/Samples/5_Domain_Specific/binomialOptions_nvrtc/README.md
@@ -10,7 +10,7 @@ Computational Finance, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuLaunchKernel, cuModuleGetGlobal, cuCtxSynchronize, cuMemcpyDtoH, cuModuleGetFunction, cuMemcpyHtoD
+cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuModuleGetGlobal, cuCtxSynchronize, cuModuleGetFunction
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 cudaBlockSize, cudaGridSize
@@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2017.vcxproj b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2017.vcxproj
index b5cd46de..4e020948 100644
--- a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2019.vcxproj b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2019.vcxproj
index e79add30..153e2b1c 100644
--- a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2022.vcxproj b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2022.vcxproj
index 7dbfcf7a..d2720c08 100644
--- a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/Makefile b/Samples/5_Domain_Specific/convolutionFFT2D/Makefile
index 35185f28..0528320a 100644
--- a/Samples/5_Domain_Specific/convolutionFFT2D/Makefile
+++ b/Samples/5_Domain_Specific/convolutionFFT2D/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/NsightEclipse.xml b/Samples/5_Domain_Specific/convolutionFFT2D/NsightEclipse.xml
index f197fa28..f8134055 100644
--- a/Samples/5_Domain_Specific/convolutionFFT2D/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/convolutionFFT2D/NsightEclipse.xml
@@ -3,13 +3,13 @@
 <entry>
   <name>convolutionFFT2D</name>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMalloc</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates how 2D convolutions with very large kernel sizes can be efficiently implemented using FFT transformations.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -58,6 +58,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/README.md b/Samples/5_Domain_Specific/convolutionFFT2D/README.md
index 089b847d..0f8d5193 100644
--- a/Samples/5_Domain_Specific/convolutionFFT2D/README.md
+++ b/Samples/5_Domain_Specific/convolutionFFT2D/README.md
@@ -10,7 +10,7 @@ Image Processing, CUFFT Library
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaFree, cudaDestroyTextureObject, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemset, cudaMalloc
 
 ## Dependencies needed to build/run
 [CUFFT](../../../README.md#cufft)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2017.vcxproj b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2017.vcxproj
index 997c7912..4fe20d85 100644
--- a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/convolutionFFT2D.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -110,6 +110,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2019.vcxproj b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2019.vcxproj
index 3156c87f..71bad451 100644
--- a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/convolutionFFT2D.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2022.vcxproj b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2022.vcxproj
index 641a761c..a73a0ced 100644
--- a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/convolutionFFT2D.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/dwtHaar1D/Makefile b/Samples/5_Domain_Specific/dwtHaar1D/Makefile
index d24cac3f..abd407f4 100644
--- a/Samples/5_Domain_Specific/dwtHaar1D/Makefile
+++ b/Samples/5_Domain_Specific/dwtHaar1D/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/dwtHaar1D/NsightEclipse.xml b/Samples/5_Domain_Specific/dwtHaar1D/NsightEclipse.xml
index 386a5469..daa96121 100644
--- a/Samples/5_Domain_Specific/dwtHaar1D/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/dwtHaar1D/NsightEclipse.xml
@@ -4,8 +4,8 @@
   <name>dwtHaar1D</name>
   <cuda_api_list>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaFree</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
   </cuda_api_list>
   <description><![CDATA[Discrete Haar wavelet decomposition for 1D signals with a length which is a power of 2.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/dwtHaar1D/README.md b/Samples/5_Domain_Specific/dwtHaar1D/README.md
index 6d4eb998..da368a4e 100644
--- a/Samples/5_Domain_Specific/dwtHaar1D/README.md
+++ b/Samples/5_Domain_Specific/dwtHaar1D/README.md
@@ -10,7 +10,7 @@ Image Processing, Video Compression
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMalloc, cudaFree, cudaMemcpy
+cudaMalloc, cudaMemcpy, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2017.vcxproj b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2017.vcxproj
index 6592422c..6b693574 100644
--- a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/dwtHaar1D.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2019.vcxproj b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2019.vcxproj
index 9cd792c5..c191c4ff 100644
--- a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/dwtHaar1D.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2022.vcxproj b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2022.vcxproj
index 739fd5e6..bba596d8 100644
--- a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/dwtHaar1D.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/dxtc/Makefile b/Samples/5_Domain_Specific/dxtc/Makefile
index 2de94434..b379d6c8 100644
--- a/Samples/5_Domain_Specific/dxtc/Makefile
+++ b/Samples/5_Domain_Specific/dxtc/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/dxtc/NsightEclipse.xml b/Samples/5_Domain_Specific/dxtc/NsightEclipse.xml
index f194bea9..a31d16cb 100644
--- a/Samples/5_Domain_Specific/dxtc/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/dxtc/NsightEclipse.xml
@@ -3,12 +3,12 @@
 <entry>
   <name>dxtc</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
     <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[High Quality DXT Compression using CUDA. This example shows how to implement an existing computationally-intensive CPU compression algorithm in parallel on the GPU, and obtain an order of magnitude performance improvement.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -61,6 +61,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/dxtc/README.md b/Samples/5_Domain_Specific/dxtc/README.md
index f45d97ea..3a805668 100644
--- a/Samples/5_Domain_Specific/dxtc/README.md
+++ b/Samples/5_Domain_Specific/dxtc/README.md
@@ -10,7 +10,7 @@ Cooperative Groups, Image Processing, Image Compression
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaGetDevice, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/dxtc/dxtc_vs2017.vcxproj b/Samples/5_Domain_Specific/dxtc/dxtc_vs2017.vcxproj
index c655b304..e1d68433 100644
--- a/Samples/5_Domain_Specific/dxtc/dxtc_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/dxtc/dxtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/dxtc.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/dxtc/dxtc_vs2019.vcxproj b/Samples/5_Domain_Specific/dxtc/dxtc_vs2019.vcxproj
index 53567caa..c1b7f6db 100644
--- a/Samples/5_Domain_Specific/dxtc/dxtc_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/dxtc/dxtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/dxtc.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/dxtc/dxtc_vs2022.vcxproj b/Samples/5_Domain_Specific/dxtc/dxtc_vs2022.vcxproj
index bf3c5c25..8609b640 100644
--- a/Samples/5_Domain_Specific/dxtc/dxtc_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/dxtc/dxtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/dxtc.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fastWalshTransform/Makefile b/Samples/5_Domain_Specific/fastWalshTransform/Makefile
index 1ba7282a..3cf3f547 100644
--- a/Samples/5_Domain_Specific/fastWalshTransform/Makefile
+++ b/Samples/5_Domain_Specific/fastWalshTransform/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/fastWalshTransform/NsightEclipse.xml b/Samples/5_Domain_Specific/fastWalshTransform/NsightEclipse.xml
index a88475e9..9e627352 100644
--- a/Samples/5_Domain_Specific/fastWalshTransform/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/fastWalshTransform/NsightEclipse.xml
@@ -3,11 +3,11 @@
 <entry>
   <name>fastWalshTransform</name>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaMemset</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[Naturally(Hadamard)-ordered Fast Walsh Transform for batching vectors of arbitrary eligible lengths that are power of two in size.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -54,6 +54,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/fastWalshTransform/README.md b/Samples/5_Domain_Specific/fastWalshTransform/README.md
index 00dd99d2..473f4ce9 100644
--- a/Samples/5_Domain_Specific/fastWalshTransform/README.md
+++ b/Samples/5_Domain_Specific/fastWalshTransform/README.md
@@ -10,7 +10,7 @@ Linear Algebra, Data-Parallel Algorithms, Video Compression
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMemset, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2017.vcxproj b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2017.vcxproj
index 6b2fb2f2..6ee445df 100644
--- a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/fastWalshTransform.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2019.vcxproj b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2019.vcxproj
index d6fdbf34..71cafa08 100644
--- a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/fastWalshTransform.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2022.vcxproj b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2022.vcxproj
index 12977578..8e30886b 100644
--- a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/fastWalshTransform.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fluidsD3D9/README.md b/Samples/5_Domain_Specific/fluidsD3D9/README.md
index 716a5aab..912936d8 100644
--- a/Samples/5_Domain_Specific/fluidsD3D9/README.md
+++ b/Samples/5_Domain_Specific/fluidsD3D9/README.md
@@ -10,7 +10,7 @@ Graphics Interop, CUFFT Library, Physically-Based Simulation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaFreeArray, cudaGraphicsResourceGetMappedPointer, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGetLastError, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice
+cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaGetDevice, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2017.vcxproj b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2017.vcxproj
index fe88c506..44221683 100644
--- a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/fluidsD3D9.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -110,6 +110,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2019.vcxproj b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2019.vcxproj
index c7cd7d37..3feb3a21 100644
--- a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/fluidsD3D9.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2022.vcxproj b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2022.vcxproj
index 2ff8ad41..3dedaf00 100644
--- a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/fluidsD3D9.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fluidsGL/Makefile b/Samples/5_Domain_Specific/fluidsGL/Makefile
index 4596e5ed..1c987228 100644
--- a/Samples/5_Domain_Specific/fluidsGL/Makefile
+++ b/Samples/5_Domain_Specific/fluidsGL/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/fluidsGL/NsightEclipse.xml b/Samples/5_Domain_Specific/fluidsGL/NsightEclipse.xml
index e2ebcaaa..96bb4ea0 100644
--- a/Samples/5_Domain_Specific/fluidsGL/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/fluidsGL/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>fluidsGL</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaMallocPitch</toolkit>
-    <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMallocPitch</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[An example of fluid simulation using CUDA and CUFFT, with OpenGL rendering.]]></description>
@@ -83,6 +83,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/fluidsGL/README.md b/Samples/5_Domain_Specific/fluidsGL/README.md
index ea572321..0d492ebc 100644
--- a/Samples/5_Domain_Specific/fluidsGL/README.md
+++ b/Samples/5_Domain_Specific/fluidsGL/README.md
@@ -10,7 +10,7 @@ Graphics Interop, CUFFT Library, Physically-Based Simulation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl), [CUFFT](../../../README.md#cufft)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/fluidsGL/findgllib.mk b/Samples/5_Domain_Specific/fluidsGL/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/5_Domain_Specific/fluidsGL/findgllib.mk
+++ b/Samples/5_Domain_Specific/fluidsGL/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2017.vcxproj b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2017.vcxproj
index c90c550e..8d2822ef 100644
--- a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/fluidsGL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -120,6 +120,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2019.vcxproj b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2019.vcxproj
index 1e2ee21e..aa2839e7 100644
--- a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/fluidsGL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -116,6 +116,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2022.vcxproj b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2022.vcxproj
index 8e7553c7..eeae6a67 100644
--- a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/fluidsGL/fluidsGL_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/fluidsGL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -116,6 +116,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/fluidsGLES/Makefile b/Samples/5_Domain_Specific/fluidsGLES/Makefile
index 269debb0..00b514cf 100644
--- a/Samples/5_Domain_Specific/fluidsGLES/Makefile
+++ b/Samples/5_Domain_Specific/fluidsGLES/Makefile
@@ -313,9 +313,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/fluidsGLES/NsightEclipse.xml b/Samples/5_Domain_Specific/fluidsGLES/NsightEclipse.xml
index e5feea24..afaeff7e 100644
--- a/Samples/5_Domain_Specific/fluidsGLES/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/fluidsGLES/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>fluidsGLES</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaMallocPitch</toolkit>
-    <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMallocPitch</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[An example of fluid simulation using CUDA and CUFFT, with OpenGLES rendering.]]></description>
@@ -74,6 +74,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>arm</arch>
diff --git a/Samples/5_Domain_Specific/fluidsGLES/README.md b/Samples/5_Domain_Specific/fluidsGLES/README.md
index 406eb1bc..b2432dd0 100644
--- a/Samples/5_Domain_Specific/fluidsGLES/README.md
+++ b/Samples/5_Domain_Specific/fluidsGLES/README.md
@@ -10,7 +10,7 @@ Graphics Interop, CUFFT Library, Physically-Based Simulation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDestroyTextureObject, cudaMallocPitch, cudaCreateTextureObject, cudaMalloc, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GLES](../../../README.md#gles), [CUFFT](../../../README.md#cufft)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/fluidsGLES/findgleslib.mk b/Samples/5_Domain_Specific/fluidsGLES/findgleslib.mk
index bcb335c1..6da2f078 100644
--- a/Samples/5_Domain_Specific/fluidsGLES/findgleslib.mk
+++ b/Samples/5_Domain_Specific/fluidsGLES/findgleslib.mk
@@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
-    ifeq ("$(SUSE)","0")
+	
+	ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+	  
   # find libGL, libGLU, libXi, 
   EGLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so    -print 2>/dev/null)
   GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/marchingCubes/Makefile b/Samples/5_Domain_Specific/marchingCubes/Makefile
index 91180207..baf4f0f0 100644
--- a/Samples/5_Domain_Specific/marchingCubes/Makefile
+++ b/Samples/5_Domain_Specific/marchingCubes/Makefile
@@ -324,9 +324,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/marchingCubes/NsightEclipse.xml b/Samples/5_Domain_Specific/marchingCubes/NsightEclipse.xml
index ea03311e..0c23d020 100644
--- a/Samples/5_Domain_Specific/marchingCubes/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/marchingCubes/NsightEclipse.xml
@@ -6,21 +6,21 @@
     <flag>--std=c++14</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaGLUnregisterBufferObject</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
     <toolkit>cudaGLUnmapBufferObject</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaGLMapBufferObject</toolkit>
-    <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaCreateChannelDesc</toolkit>
-    <toolkit>cudaGLRegisterBufferObject</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaCreateChannelDesc</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGLMapBufferObject</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaGLUnregisterBufferObject</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaGLRegisterBufferObject</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample extracts a geometric isosurface from a volume dataset using the marching cubes algorithm. It uses the scan (prefix sum) function from the Thrust library to perform stream compaction.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -92,6 +92,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/marchingCubes/README.md b/Samples/5_Domain_Specific/marchingCubes/README.md
index 683f4f6b..0c2ed9e1 100644
--- a/Samples/5_Domain_Specific/marchingCubes/README.md
+++ b/Samples/5_Domain_Specific/marchingCubes/README.md
@@ -10,7 +10,7 @@ OpenGL Graphics Interop, Vertex Buffers, 3D Graphics, Physically Based Simulatio
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsGLRegisterBuffer, cudaGLUnregisterBufferObject, cudaGraphicsResourceGetMappedPointer, cudaGLUnmapBufferObject, cudaDestroyTextureObject, cudaGLMapBufferObject, cudaCreateTextureObject, cudaMalloc, cudaCreateChannelDesc, cudaGLRegisterBufferObject, cudaGraphicsUnmapResources, cudaMemcpy
+cudaGLUnmapBufferObject, cudaGraphicsUnmapResources, cudaCreateChannelDesc, cudaMemcpy, cudaFree, cudaGLMapBufferObject, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaGLUnregisterBufferObject, cudaCreateTextureObject, cudaGLRegisterBufferObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/marchingCubes/findgllib.mk b/Samples/5_Domain_Specific/marchingCubes/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/5_Domain_Specific/marchingCubes/findgllib.mk
+++ b/Samples/5_Domain_Specific/marchingCubes/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2017.vcxproj b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2017.vcxproj
index 9eb7601b..9d5e9d28 100644
--- a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/marchingCubes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -119,6 +119,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2019.vcxproj b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2019.vcxproj
index e2edca52..9e370d0b 100644
--- a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/marchingCubes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -115,6 +115,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2022.vcxproj b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2022.vcxproj
index 284e00cb..ef1da880 100644
--- a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/marchingCubes/marchingCubes_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/marchingCubes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -115,6 +115,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/nbody/Makefile b/Samples/5_Domain_Specific/nbody/Makefile
index d1d2e614..f4e1df42 100644
--- a/Samples/5_Domain_Specific/nbody/Makefile
+++ b/Samples/5_Domain_Specific/nbody/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/nbody/NsightEclipse.xml b/Samples/5_Domain_Specific/nbody/NsightEclipse.xml
index 293adc58..213de221 100644
--- a/Samples/5_Domain_Specific/nbody/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/nbody/NsightEclipse.xml
@@ -6,24 +6,24 @@
     <flag>-ftz=true</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaStreamQuery</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaGraphicsResourceSetMapFlags</toolkit>
-    <toolkit>cudaSetDeviceFlags</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaDeviceCanAccessPeer</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaSetDeviceFlags</toolkit>
+    <toolkit>cudaGraphicsResourceSetMapFlags</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaStreamQuery</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaDeviceCanAccessPeer</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates efficient all-pairs simulation of a gravitational n-body simulation in CUDA.  This sample accompanies the GPU Gems 3 chapter "Fast N-Body Simulation with CUDA".  With CUDA 5.5, performance on Tesla K20c has increased to over 1.8TFLOP/s single precision.  Double Performance has also improved on all Kepler and Fermi GPU architectures as well.  Starting in CUDA 4.0, the nBody sample has been updated to take advantage of new features to easily scale the n-body simulation across multiple GPUs in a single PC.  Adding "-numbodies=<bodies>" to the command line will allow users to set # of bodies for simulation.  Adding “-numdevices=<N>” to the command line option will cause the sample to use N devices (if available) for simulation.  In this mode, the position and velocity data for all bodies are read from system memory using “zero copy” rather than from device memory.  For a small number of devices (4 or fewer) and a large enough number of bodies, bandwidth is not a bottleneck so we can achieve strong scaling across these devices.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -91,6 +91,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/nbody/README.md b/Samples/5_Domain_Specific/nbody/README.md
index 40d122a7..837296c7 100644
--- a/Samples/5_Domain_Specific/nbody/README.md
+++ b/Samples/5_Domain_Specific/nbody/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaGraphicsMapResources, cudaEventRecord, cudaStreamQuery, cudaEventCreate, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaGraphicsResourceSetMapFlags, cudaSetDeviceFlags, cudaEventDestroy, cudaDeviceCanAccessPeer, cudaSetDevice, cudaGraphicsUnmapResources, cudaGetDeviceProperties, cudaGetDevice
+cudaGraphicsUnmapResources, cudaSetDeviceFlags, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceCount, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMemcpyToSymbol, cudaStreamQuery, cudaEventDestroy, cudaEventElapsedTime, cudaDeviceCanAccessPeer, cudaEventCreate
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/nbody/findgllib.mk b/Samples/5_Domain_Specific/nbody/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/5_Domain_Specific/nbody/findgllib.mk
+++ b/Samples/5_Domain_Specific/nbody/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/nbody/nbody_vs2017.vcxproj b/Samples/5_Domain_Specific/nbody/nbody_vs2017.vcxproj
index 1406b31b..99e5a6bf 100644
--- a/Samples/5_Domain_Specific/nbody/nbody_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/nbody/nbody_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/nbody.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -125,6 +125,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/nbody/nbody_vs2019.vcxproj b/Samples/5_Domain_Specific/nbody/nbody_vs2019.vcxproj
index ae04b090..7662e500 100644
--- a/Samples/5_Domain_Specific/nbody/nbody_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/nbody/nbody_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/nbody.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -121,6 +121,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/nbody/nbody_vs2022.vcxproj b/Samples/5_Domain_Specific/nbody/nbody_vs2022.vcxproj
index c1495525..1c2c9ecf 100644
--- a/Samples/5_Domain_Specific/nbody/nbody_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/nbody/nbody_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/nbody.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -121,6 +121,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/nbody_opengles/Makefile b/Samples/5_Domain_Specific/nbody_opengles/Makefile
index 5b56f105..ef0b753b 100644
--- a/Samples/5_Domain_Specific/nbody_opengles/Makefile
+++ b/Samples/5_Domain_Specific/nbody_opengles/Makefile
@@ -313,9 +313,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/nbody_opengles/NsightEclipse.xml b/Samples/5_Domain_Specific/nbody_opengles/NsightEclipse.xml
index e7e56c9a..4ffba110 100644
--- a/Samples/5_Domain_Specific/nbody_opengles/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/nbody_opengles/NsightEclipse.xml
@@ -6,23 +6,23 @@
     <flag>-ftz=true</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaStreamQuery</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaGraphicsResourceSetMapFlags</toolkit>
-    <toolkit>cudaSetDeviceFlags</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaSetDeviceFlags</toolkit>
+    <toolkit>cudaGraphicsResourceSetMapFlags</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaStreamQuery</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates efficient all-pairs simulation of a gravitational n-body simulation in CUDA. Unlike the OpenGL nbody sample, there is no user interaction.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -81,6 +81,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>arm</arch>
diff --git a/Samples/5_Domain_Specific/nbody_opengles/README.md b/Samples/5_Domain_Specific/nbody_opengles/README.md
index 3c4772d2..a7911e09 100644
--- a/Samples/5_Domain_Specific/nbody_opengles/README.md
+++ b/Samples/5_Domain_Specific/nbody_opengles/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaGraphicsMapResources, cudaEventRecord, cudaStreamQuery, cudaEventCreate, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaGraphicsResourceSetMapFlags, cudaSetDeviceFlags, cudaEventDestroy, cudaSetDevice, cudaGraphicsUnmapResources, cudaGetDeviceProperties, cudaGetDevice
+cudaGraphicsUnmapResources, cudaSetDeviceFlags, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMemcpyToSymbol, cudaStreamQuery, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GLES](../../../README.md#gles)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/nbody_opengles/findgleslib.mk b/Samples/5_Domain_Specific/nbody_opengles/findgleslib.mk
index bcb335c1..6da2f078 100644
--- a/Samples/5_Domain_Specific/nbody_opengles/findgleslib.mk
+++ b/Samples/5_Domain_Specific/nbody_opengles/findgleslib.mk
@@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
-    ifeq ("$(SUSE)","0")
+	
+	ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+	  
   # find libGL, libGLU, libXi, 
   EGLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so    -print 2>/dev/null)
   GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/nbody_screen/Makefile b/Samples/5_Domain_Specific/nbody_screen/Makefile
index 60558f49..b54e1795 100644
--- a/Samples/5_Domain_Specific/nbody_screen/Makefile
+++ b/Samples/5_Domain_Specific/nbody_screen/Makefile
@@ -320,9 +320,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/nbody_screen/NsightEclipse.xml b/Samples/5_Domain_Specific/nbody_screen/NsightEclipse.xml
index 1c3275ce..074a2e5b 100644
--- a/Samples/5_Domain_Specific/nbody_screen/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/nbody_screen/NsightEclipse.xml
@@ -6,23 +6,23 @@
     <flag>-ftz=true</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaStreamQuery</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaGraphicsResourceSetMapFlags</toolkit>
-    <toolkit>cudaSetDeviceFlags</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaSetDeviceFlags</toolkit>
+    <toolkit>cudaGraphicsResourceSetMapFlags</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaStreamQuery</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates efficient all-pairs simulation of a gravitational n-body simulation in CUDA. Unlike the OpenGL nbody sample, there is no user interaction.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -79,6 +79,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <platform>qnx</platform>
diff --git a/Samples/5_Domain_Specific/nbody_screen/README.md b/Samples/5_Domain_Specific/nbody_screen/README.md
index 73787bdd..54b9df1c 100644
--- a/Samples/5_Domain_Specific/nbody_screen/README.md
+++ b/Samples/5_Domain_Specific/nbody_screen/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaGraphicsMapResources, cudaEventRecord, cudaStreamQuery, cudaEventCreate, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaGraphicsResourceSetMapFlags, cudaSetDeviceFlags, cudaEventDestroy, cudaSetDevice, cudaGraphicsUnmapResources, cudaGetDeviceProperties, cudaGetDevice
+cudaGraphicsUnmapResources, cudaSetDeviceFlags, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMemcpyToSymbol, cudaStreamQuery, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate
 
 ## Dependencies needed to build/run
 [screen](../../../README.md#screen), [GLES](../../../README.md#gles)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/nbody_screen/findgleslib.mk b/Samples/5_Domain_Specific/nbody_screen/findgleslib.mk
index bcb335c1..6da2f078 100644
--- a/Samples/5_Domain_Specific/nbody_screen/findgleslib.mk
+++ b/Samples/5_Domain_Specific/nbody_screen/findgleslib.mk
@@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
-    ifeq ("$(SUSE)","0")
+	
+	ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+	  
   # find libGL, libGLU, libXi, 
   EGLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so    -print 2>/dev/null)
   GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/Makefile b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/Makefile
index 6933444e..37afba15 100644
--- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/Makefile
+++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/NsightEclipse.xml b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/NsightEclipse.xml
index 1fc38a6c..57679e4c 100644
--- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/NsightEclipse.xml
@@ -3,30 +3,30 @@
 <entry>
   <name>p2pBandwidthLatencyTest</name>
   <cuda_api_list>
-    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
-    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaDeviceCanAccessPeer</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaHostAlloc</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
     <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaMemcpyPeerAsync</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
     <toolkit>cudaCheckError</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaDeviceDisablePeerAccess</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
-    <toolkit>cudaStreamWaitEvent</toolkit>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaDeviceCanAccessPeer</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaStreamWaitEvent</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
+    <toolkit>cudaHostAlloc</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGetErrorString</toolkit>
+    <toolkit>cudaMemcpyPeerAsync</toolkit>
+    <toolkit>cudaDeviceDisablePeerAccess</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaDeviceEnablePeerAccess</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This application demonstrates the CUDA Peer-To-Peer (P2P) data transfers between pairs of GPUs and computes latency and bandwidth.  Tests on GPU pairs using P2P and without P2P are tested.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -71,6 +71,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/README.md b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/README.md
index 72e34fb1..1df07a63 100644
--- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/README.md
+++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Asynchronous Data Transfers, Unified Virtual Address Spa
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaDeviceEnablePeerAccess, cudaOccupancyMaxPotentialBlockSize, cudaStreamCreateWithFlags, cudaDeviceCanAccessPeer, cudaStreamDestroy, cudaHostAlloc, cudaEventCreate, cudaMalloc, cudaEventDestroy, cudaSetDevice, cudaMemcpyPeerAsync, cudaGetDeviceProperties, cudaCheckError, cudaGetDeviceCount, cudaEventElapsedTime, cudaGetLastError, cudaDeviceDisablePeerAccess, cudaStreamSynchronize, cudaGetErrorString, cudaStreamWaitEvent, cudaMemset, cudaFree, cudaEventRecord, cudaFreeHost
+cudaSetDevice, cudaEventDestroy, cudaOccupancyMaxPotentialBlockSize, cudaCheckError, cudaFreeHost, cudaGetDeviceCount, cudaDeviceCanAccessPeer, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaGetLastError, cudaMemset, cudaStreamWaitEvent, cudaEventElapsedTime, cudaEventCreate, cudaHostAlloc, cudaFree, cudaGetErrorString, cudaMemcpyPeerAsync, cudaDeviceDisablePeerAccess, cudaEventRecord, cudaStreamSynchronize, cudaDeviceEnablePeerAccess, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj
index 545fa82c..cbed6fc1 100644
--- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/p2pBandwidthLatencyTest.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj
index f8e09274..43fbfc44 100644
--- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/p2pBandwidthLatencyTest.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2022.vcxproj b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2022.vcxproj
index 6e35634d..28486552 100644
--- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/p2pBandwidthLatencyTest.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/postProcessGL/Makefile b/Samples/5_Domain_Specific/postProcessGL/Makefile
index 03790fc8..c6f18f8d 100644
--- a/Samples/5_Domain_Specific/postProcessGL/Makefile
+++ b/Samples/5_Domain_Specific/postProcessGL/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/postProcessGL/NsightEclipse.xml b/Samples/5_Domain_Specific/postProcessGL/NsightEclipse.xml
index c477aeef..756864f0 100644
--- a/Samples/5_Domain_Specific/postProcessGL/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/postProcessGL/NsightEclipse.xml
@@ -3,22 +3,22 @@
 <entry>
   <name>postProcessGL</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
-    <toolkit>cudaMemcpyToArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
     <toolkit>cudaHostAlloc</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaProcess</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaGraphicsSubResourceGetMappedArray</toolkit>
-    <toolkit>cudaGetChannelDesc</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGraphicsGLRegisterImage</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGetChannelDesc</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaMemcpyToArray</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaProcess</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaGraphicsSubResourceGetMappedArray</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
+    <toolkit>cudaGraphicsGLRegisterImage</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample shows how to post-process an image rendered in OpenGL using CUDA.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -85,6 +85,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/postProcessGL/README.md b/Samples/5_Domain_Specific/postProcessGL/README.md
index 53aa76e7..821e00ff 100644
--- a/Samples/5_Domain_Specific/postProcessGL/README.md
+++ b/Samples/5_Domain_Specific/postProcessGL/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaMemcpyToArray, cudaGraphicsGLRegisterBuffer, cudaHostAlloc, cudaGraphicsResourceGetMappedPointer, cudaProcess, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaCreateTextureObject, cudaGraphicsSubResourceGetMappedArray, cudaGetChannelDesc, cudaMalloc, cudaGraphicsGLRegisterImage, cudaGraphicsUnmapResources
+cudaHostAlloc, cudaGraphicsUnmapResources, cudaMalloc, cudaFree, cudaGetChannelDesc, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaMemcpyToArray, cudaDeviceSynchronize, cudaCreateTextureObject, cudaProcess, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsGLRegisterImage
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/postProcessGL/findgllib.mk b/Samples/5_Domain_Specific/postProcessGL/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/5_Domain_Specific/postProcessGL/findgllib.mk
+++ b/Samples/5_Domain_Specific/postProcessGL/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2017.vcxproj b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2017.vcxproj
index 015eb83d..23cf0406 100644
--- a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/postProcessGL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2019.vcxproj b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2019.vcxproj
index 7ac06e89..08c28e4d 100644
--- a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/postProcessGL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2022.vcxproj b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2022.vcxproj
index b549c092..b70819ba 100644
--- a/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/postProcessGL/postProcessGL_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/postProcessGL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/Makefile b/Samples/5_Domain_Specific/quasirandomGenerator/Makefile
index 3bc715cb..04628cd3 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator/Makefile
+++ b/Samples/5_Domain_Specific/quasirandomGenerator/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/NsightEclipse.xml b/Samples/5_Domain_Specific/quasirandomGenerator/NsightEclipse.xml
index b66fab73..c35eaa11 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/quasirandomGenerator/NsightEclipse.xml
@@ -3,12 +3,12 @@
 <entry>
   <name>quasirandomGenerator</name>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements Niederreiter Quasirandom Sequence Generator and Inverse Cumulative Normal Distribution functions for the generation of Standard Normal Distributions.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -49,6 +49,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/README.md b/Samples/5_Domain_Specific/quasirandomGenerator/README.md
index 8abc9977..54afdbba 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator/README.md
+++ b/Samples/5_Domain_Specific/quasirandomGenerator/README.md
@@ -10,7 +10,7 @@ Computational Finance
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaMemset, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMemset, cudaMemcpyToSymbol, cudaMalloc
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2017.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2017.vcxproj
index c23b1172..be6fcdae 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/quasirandomGenerator.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2019.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2019.vcxproj
index 4d1c0c50..a7083425 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/quasirandomGenerator.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2022.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2022.vcxproj
index dbec91a6..0861b23a 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/quasirandomGenerator.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md
index 3e20ffa0..c91d1a24 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md
+++ b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md
@@ -10,7 +10,7 @@ Computational Finance, Runtime Compilation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuMemFree, cuMemcpyDtoH, cuMemAlloc
+cuMemcpyDtoH, cuMemAlloc, cuMemFree
 
 ## Dependencies needed to build/run
 [NVRTC](../../../README.md#nvrtc)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2017.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2017.vcxproj
index b07a5e07..3faf8550 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -110,6 +110,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2019.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2019.vcxproj
index e11918b8..cb8893b8 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2022.vcxproj b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2022.vcxproj
index 0bdb2adb..9dc93ac6 100644
--- a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_nvrtc_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/recursiveGaussian/Makefile b/Samples/5_Domain_Specific/recursiveGaussian/Makefile
index 011149f3..f228dea7 100644
--- a/Samples/5_Domain_Specific/recursiveGaussian/Makefile
+++ b/Samples/5_Domain_Specific/recursiveGaussian/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/recursiveGaussian/NsightEclipse.xml b/Samples/5_Domain_Specific/recursiveGaussian/NsightEclipse.xml
index 00c1b1b7..624f69c5 100644
--- a/Samples/5_Domain_Specific/recursiveGaussian/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/recursiveGaussian/NsightEclipse.xml
@@ -3,17 +3,17 @@
 <entry>
   <name>recursiveGaussian</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
     <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
     <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample implements a Gaussian blur using Deriche's recursive method. The advantage of this method is that the execution time is independent of the filter width.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -84,6 +84,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/recursiveGaussian/README.md b/Samples/5_Domain_Specific/recursiveGaussian/README.md
index 50d33152..9e1475c4 100644
--- a/Samples/5_Domain_Specific/recursiveGaussian/README.md
+++ b/Samples/5_Domain_Specific/recursiveGaussian/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice
+cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGetDevice, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/recursiveGaussian/findgllib.mk b/Samples/5_Domain_Specific/recursiveGaussian/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/5_Domain_Specific/recursiveGaussian/findgllib.mk
+++ b/Samples/5_Domain_Specific/recursiveGaussian/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2017.vcxproj b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2017.vcxproj
index e6c68d03..f706030a 100644
--- a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/recursiveGaussian.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2019.vcxproj b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2019.vcxproj
index a0de3445..72663de2 100644
--- a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/recursiveGaussian.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2022.vcxproj b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2022.vcxproj
index 44739af7..947a471c 100644
--- a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/recursiveGaussian.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D10/README.md b/Samples/5_Domain_Specific/simpleD3D10/README.md
index cf446dc4..a9d7cde7 100644
--- a/Samples/5_Domain_Specific/simpleD3D10/README.md
+++ b/Samples/5_Domain_Specific/simpleD3D10/README.md
@@ -10,7 +10,7 @@ Graphics Interop, 3D Graphics
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaGetLastError, cudaGraphicsUnmapResources, cudaGetErrorString, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaGetErrorString, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaGraphicsUnregisterResource, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2017.vcxproj
index b5bbaf2d..4dadd193 100644
--- a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleD3D10.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2019.vcxproj
index 3b3f6b28..1dca8e14 100644
--- a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D10.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2022.vcxproj
index 738f55bc..630c0ea1 100644
--- a/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D10/simpleD3D10_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D10.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/README.md b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/README.md
index a8b68908..49077dc7 100644
--- a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/README.md
+++ b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Texture
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsResourceGetMappedPointer, cudaGetDeviceCount, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaBindTextureToArray, cudaUnbindTexture, cudaGetLastError, cudaGraphicsUnmapResources, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaMemcpy, cudaMalloc, cudaUnbindTexture, cudaGetErrorString, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaBindTextureToArray, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2017.vcxproj
index 19a3c9f2..08ad0dfc 100644
--- a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleD3D10RenderTarget.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2019.vcxproj
index 335175e0..5478db87 100644
--- a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D10RenderTarget.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2022.vcxproj
index 75759d68..532e9d4d 100644
--- a/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D10RenderTarget/simpleD3D10RenderTarget_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D10RenderTarget.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D10Texture/README.md b/Samples/5_Domain_Specific/simpleD3D10Texture/README.md
index f96f34c7..c8f1a2ed 100644
--- a/Samples/5_Domain_Specific/simpleD3D10Texture/README.md
+++ b/Samples/5_Domain_Specific/simpleD3D10Texture/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Texture
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGetDeviceCount, cudaMallocPitch, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGetLastError, cudaGraphicsUnmapResources, cudaGetErrorString, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaGetErrorString, cudaFree, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2017.vcxproj
index 2ad151f8..f5f7322f 100644
--- a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleD3D10Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2019.vcxproj
index 793e7da3..d0c81315 100644
--- a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D10Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2022.vcxproj
index aa188b08..ebc26819 100644
--- a/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D10Texture/simpleD3D10Texture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D10Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D11/README.md b/Samples/5_Domain_Specific/simpleD3D11/README.md
index cfdb0c79..fadf5bf0 100644
--- a/Samples/5_Domain_Specific/simpleD3D11/README.md
+++ b/Samples/5_Domain_Specific/simpleD3D11/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaSignalExternalSemaphoresAsync, cudaImportVertexBuffer, cudaAcquireSync, cudaGetDeviceCount, cudaImportExternalSemaphore, cudaReleaseSync, cudaImportExternalMemory, cudaExternalMemoryGetMappedBuffer, cudaImportKeyedMutex, cudaStreamCreateWithFlags, cudaDestroyExternalSemaphore, cudaSetDevice, cudaWaitExternalSemaphoresAsync, cudaDestroyExternalMemory
+cudaImportKeyedMutex, cudaExternalMemoryGetMappedBuffer, cudaStreamCreateWithFlags, cudaWaitExternalSemaphoresAsync, cudaImportExternalSemaphore, cudaFree, cudaImportVertexBuffer, cudaReleaseSync, cudaSetDevice, cudaSignalExternalSemaphoresAsync, cudaAcquireSync, cudaDestroyExternalMemory, cudaImportExternalMemory, cudaGetDeviceCount, cudaDestroyExternalSemaphore
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2017.vcxproj
index 9f0f0e9d..2a11df06 100644
--- a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleD3D11.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2019.vcxproj
index af23ccc6..f3dbb2e0 100644
--- a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D11.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2022.vcxproj
index b29aa4bb..7338f410 100644
--- a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D11.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/README.md b/Samples/5_Domain_Specific/simpleD3D11Texture/README.md
index da8a3875..9c4cf954 100644
--- a/Samples/5_Domain_Specific/simpleD3D11Texture/README.md
+++ b/Samples/5_Domain_Specific/simpleD3D11Texture/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGetDeviceCount, cudaMallocPitch, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGetLastError, cudaGraphicsUnmapResources, cudaGetErrorString, cudaGetDeviceProperties
+cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaGetErrorString, cudaFree, cudaGetLastError, cudaGraphicsMapResources, cudaGetDeviceCount, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2017.vcxproj
index 81f6d156..025c68ff 100644
--- a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleD3D11Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2019.vcxproj
index e2b4e089..67799b21 100644
--- a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D11Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2022.vcxproj
index 9b06a4fd..6345c10c 100644
--- a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D11Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D12/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleD3D12/NsightEclipse.xml
index f1843ba8..0e142a5a 100644
--- a/Samples/5_Domain_Specific/simpleD3D12/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/simpleD3D12/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>simpleD3D12</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaImportExternalSemaphore</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaImportExternalMemory</toolkit>
-    <toolkit>cudaExternalMemoryGetMappedBuffer</toolkit>
-    <toolkit>cudaDestroyExternalSemaphore</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
     <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaExternalMemoryGetMappedBuffer</toolkit>
+    <toolkit>cudaImportExternalSemaphore</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
     <toolkit>cudaDestroyExternalMemory</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaImportExternalMemory</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaDestroyExternalSemaphore</toolkit>
   </cuda_api_list>
   <description><![CDATA[A program which demonstrates Direct3D12 interoperability with CUDA.  The program creates a sinewave in DX12 vertex buffer which is created using CUDA kernels. DX12 and CUDA synchronizes using DirectX12 Fences. Direct3D then renders the results on the screen.  A DirectX12 Capable NVIDIA GPU is required on Windows10 or higher OS.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -59,6 +59,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <platform>windows10</platform>
diff --git a/Samples/5_Domain_Specific/simpleD3D12/README.md b/Samples/5_Domain_Specific/simpleD3D12/README.md
index 5dc8a446..2e472bf0 100644
--- a/Samples/5_Domain_Specific/simpleD3D12/README.md
+++ b/Samples/5_Domain_Specific/simpleD3D12/README.md
@@ -10,7 +10,7 @@ Graphics Interop, CUDA DX12 Interop, Image Processing
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaSignalExternalSemaphoresAsync, cudaStreamCreate, cudaGetDeviceCount, cudaImportExternalSemaphore, cudaGetDeviceProperties, cudaImportExternalMemory, cudaExternalMemoryGetMappedBuffer, cudaDestroyExternalSemaphore, cudaSetDevice, cudaWaitExternalSemaphoresAsync, cudaStreamSynchronize, cudaDestroyExternalMemory
+cudaWaitExternalSemaphoresAsync, cudaExternalMemoryGetMappedBuffer, cudaImportExternalSemaphore, cudaFree, cudaSetDevice, cudaSignalExternalSemaphoresAsync, cudaGetDeviceProperties, cudaStreamSynchronize, cudaDestroyExternalMemory, cudaStreamCreate, cudaImportExternalMemory, cudaGetDeviceCount, cudaDestroyExternalSemaphore
 
 ## Dependencies needed to build/run
 [DirectX12](../../../README.md#directx12)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2017.vcxproj
index b75c6dae..35882386 100644
--- a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleD3D12.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -119,6 +119,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2019.vcxproj
index 44e46df3..32c9763c 100644
--- a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2019.vcxproj
@@ -39,7 +39,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -68,7 +68,7 @@
       <OutputFile>$(OutDir)/simpleD3D12.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -120,6 +120,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2022.vcxproj
index 5a793f3c..a7462092 100644
--- a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12_vs2022.vcxproj
@@ -39,7 +39,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -68,7 +68,7 @@
       <OutputFile>$(OutDir)/simpleD3D12.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -120,6 +120,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D9/README.md b/Samples/5_Domain_Specific/simpleD3D9/README.md
index 4a9ea67b..708a6cf3 100644
--- a/Samples/5_Domain_Specific/simpleD3D9/README.md
+++ b/Samples/5_Domain_Specific/simpleD3D9/README.md
@@ -10,7 +10,7 @@ Graphics Interop
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsUnmapResources
+cudaGraphicsUnmapResources, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaGraphicsUnregisterResource
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2017.vcxproj
index bea08338..513a7b28 100644
--- a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleD3D9.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2019.vcxproj
index 9089cbf7..6c61823a 100644
--- a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D9.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2022.vcxproj
index ffda5e78..3389eb90 100644
--- a/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D9/simpleD3D9_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D9.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D9Texture/README.md b/Samples/5_Domain_Specific/simpleD3D9Texture/README.md
index 96dffa34..51bf5c0b 100644
--- a/Samples/5_Domain_Specific/simpleD3D9Texture/README.md
+++ b/Samples/5_Domain_Specific/simpleD3D9Texture/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Texture
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaGraphicsMapResources, cudaGraphicsUnregisterResource, cudaMallocPitch, cudaMalloc, cudaGraphicsSubResourceGetMappedArray, cudaGetLastError, cudaGraphicsUnmapResources
+cudaGraphicsUnmapResources, cudaMalloc, cudaMallocPitch, cudaFree, cudaGetLastError, cudaGraphicsMapResources, cudaMemset, cudaGraphicsUnregisterResource, cudaGraphicsSubResourceGetMappedArray
 
 ## Dependencies needed to build/run
 [DirectX](../../../README.md#directx)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2017.vcxproj
index 5e316cdc..ab2a79d1 100644
--- a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleD3D9Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2019.vcxproj
index 3280f464..53853db5 100644
--- a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D9Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2022.vcxproj
index 0654afd2..de004fdc 100644
--- a/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleD3D9Texture/simpleD3D9Texture_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleD3D9Texture.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleGL/Makefile b/Samples/5_Domain_Specific/simpleGL/Makefile
index ad666c8a..47d5ed80 100644
--- a/Samples/5_Domain_Specific/simpleGL/Makefile
+++ b/Samples/5_Domain_Specific/simpleGL/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/simpleGL/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleGL/NsightEclipse.xml
index 228af026..0b73f33b 100644
--- a/Samples/5_Domain_Specific/simpleGL/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/simpleGL/NsightEclipse.xml
@@ -3,15 +3,15 @@
 <entry>
   <name>simpleGL</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[Simple program which demonstrates interoperability between CUDA and OpenGL. The program modifies vertex positions with CUDA and uses OpenGL to render the geometry.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -70,6 +70,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/simpleGL/README.md b/Samples/5_Domain_Specific/simpleGL/README.md
index e1fc492b..5fc03527 100644
--- a/Samples/5_Domain_Specific/simpleGL/README.md
+++ b/Samples/5_Domain_Specific/simpleGL/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Vertex Buffers, 3D Graphics
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy
+cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleGL/findgllib.mk b/Samples/5_Domain_Specific/simpleGL/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/5_Domain_Specific/simpleGL/findgllib.mk
+++ b/Samples/5_Domain_Specific/simpleGL/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2017.vcxproj
index cf0f4bdc..89aab5d9 100644
--- a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleGL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2019.vcxproj
index 67bdb231..934fb116 100644
--- a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleGL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2022.vcxproj
index f6a41b0b..e571db13 100644
--- a/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleGL/simpleGL_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleGL.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleGLES/Makefile b/Samples/5_Domain_Specific/simpleGLES/Makefile
index 51e32773..6e0e516e 100644
--- a/Samples/5_Domain_Specific/simpleGLES/Makefile
+++ b/Samples/5_Domain_Specific/simpleGLES/Makefile
@@ -313,9 +313,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/simpleGLES/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleGLES/NsightEclipse.xml
index 93fd9c15..fc5a25be 100644
--- a/Samples/5_Domain_Specific/simpleGLES/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/simpleGLES/NsightEclipse.xml
@@ -8,15 +8,15 @@
     <flag>-DUSE_GLES</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[Demonstrates data exchange between CUDA and OpenGL ES (aka Graphics interop). The program modifies vertex positions with CUDA and uses OpenGL ES to render the geometry.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -66,6 +66,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <sources>
     <ignore>graphics_interface.c</ignore>
   </sources>
diff --git a/Samples/5_Domain_Specific/simpleGLES/README.md b/Samples/5_Domain_Specific/simpleGLES/README.md
index 454218cf..50644d74 100644
--- a/Samples/5_Domain_Specific/simpleGLES/README.md
+++ b/Samples/5_Domain_Specific/simpleGLES/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Vertex Buffers, 3D Graphics
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy
+cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GLES](../../../README.md#gles)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleGLES/findgleslib.mk b/Samples/5_Domain_Specific/simpleGLES/findgleslib.mk
index bcb335c1..6da2f078 100644
--- a/Samples/5_Domain_Specific/simpleGLES/findgleslib.mk
+++ b/Samples/5_Domain_Specific/simpleGLES/findgleslib.mk
@@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
-    ifeq ("$(SUSE)","0")
+	
+	ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+	  
   # find libGL, libGLU, libXi, 
   EGLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so    -print 2>/dev/null)
   GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/Makefile b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/Makefile
index d3e12b7a..7debcfaa 100644
--- a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/Makefile
+++ b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/Makefile
@@ -313,9 +313,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/NsightEclipse.xml
index 27cd4982..a12a32e0 100644
--- a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/NsightEclipse.xml
@@ -10,15 +10,15 @@
     <flag>-I/usr/include/drm</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[Demonstrates data exchange between CUDA and OpenGL ES (aka Graphics interop). The program modifies vertex positions with CUDA and uses OpenGL ES to render the geometry, and shows how to render directly to the display using the EGLOutput mechanism and the DRM library.
 
@@ -75,6 +75,7 @@ $ sudo modprobe nvidia-drm modeset=1
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <sources>
     <ignore>graphics_interface_egloutput_via_egl.c</ignore>
   </sources>
diff --git a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/README.md b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/README.md
index c468f0bb..f11b2411 100644
--- a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/README.md
+++ b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/README.md
@@ -15,7 +15,7 @@ Graphics Interop, Vertex Buffers, 3D Graphics
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -28,14 +28,14 @@ armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy
+cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [EGLOutput](../../../README.md#egloutput), [GLES](../../../README.md#gles)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/findgleslib.mk b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/findgleslib.mk
index bcb335c1..6da2f078 100644
--- a/Samples/5_Domain_Specific/simpleGLES_EGLOutput/findgleslib.mk
+++ b/Samples/5_Domain_Specific/simpleGLES_EGLOutput/findgleslib.mk
@@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
-    ifeq ("$(SUSE)","0")
+	
+	ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+	  
   # find libGL, libGLU, libXi, 
   EGLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so    -print 2>/dev/null)
   GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/simpleGLES_screen/Makefile b/Samples/5_Domain_Specific/simpleGLES_screen/Makefile
index 971ce716..2629cc49 100644
--- a/Samples/5_Domain_Specific/simpleGLES_screen/Makefile
+++ b/Samples/5_Domain_Specific/simpleGLES_screen/Makefile
@@ -320,9 +320,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/simpleGLES_screen/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleGLES_screen/NsightEclipse.xml
index 342e0ce5..0b911915 100644
--- a/Samples/5_Domain_Specific/simpleGLES_screen/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/simpleGLES_screen/NsightEclipse.xml
@@ -9,15 +9,15 @@
     <flag>-DWIN_INTERFACE_CUSTOM</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[Demonstrates data exchange between CUDA and OpenGL ES (aka Graphics interop). The program modifies vertex positions with CUDA and uses OpenGL ES to render the geometry.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -67,6 +67,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <sources>
     <ignore>graphics_interface.c</ignore>
   </sources>
diff --git a/Samples/5_Domain_Specific/simpleGLES_screen/README.md b/Samples/5_Domain_Specific/simpleGLES_screen/README.md
index cf4804fd..78f96be4 100644
--- a/Samples/5_Domain_Specific/simpleGLES_screen/README.md
+++ b/Samples/5_Domain_Specific/simpleGLES_screen/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Vertex Buffers, 3D Graphics
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaGraphicsMapResources, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaDeviceSynchronize, cudaMalloc, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy
+cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [screen](../../../README.md#screen), [GLES](../../../README.md#gles)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleGLES_screen/findgleslib.mk b/Samples/5_Domain_Specific/simpleGLES_screen/findgleslib.mk
index bcb335c1..6da2f078 100644
--- a/Samples/5_Domain_Specific/simpleGLES_screen/findgleslib.mk
+++ b/Samples/5_Domain_Specific/simpleGLES_screen/findgleslib.mk
@@ -60,6 +60,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -86,27 +87,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
-    ifeq ("$(SUSE)","0")
+	
+	ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+	  
   # find libGL, libGLU, libXi, 
   EGLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so    -print 2>/dev/null)
   GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/simpleVulkan/Makefile b/Samples/5_Domain_Specific/simpleVulkan/Makefile
index 83f57ad6..8b5cfd46 100644
--- a/Samples/5_Domain_Specific/simpleVulkan/Makefile
+++ b/Samples/5_Domain_Specific/simpleVulkan/Makefile
@@ -338,9 +338,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/simpleVulkan/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleVulkan/NsightEclipse.xml
index a0ec1c56..1d9449e7 100644
--- a/Samples/5_Domain_Specific/simpleVulkan/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/simpleVulkan/NsightEclipse.xml
@@ -6,24 +6,24 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaTimelineSemaphore</toolkit>
-    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
-    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaImportExternalSemaphore</toolkit>
-    <toolkit>cudaVertMem</toolkit>
-    <toolkit>cudaImportExternalMemory</toolkit>
-    <toolkit>cudaDestroyExternalMemory</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
     <toolkit>cudaExternalMemoryGetMappedBuffer</toolkit>
     <toolkit>cudaSignalSemaphore</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
+    <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaVertMem</toolkit>
+    <toolkit>cudaImportExternalSemaphore</toolkit>
     <toolkit>cudaWaitSemaphore</toolkit>
     <toolkit>cudaHeightMap</toolkit>
-    <toolkit>cudaDestroyExternalSemaphore</toolkit>
     <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaTimelineSemaphore</toolkit>
     <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaDestroyExternalMemory</toolkit>
+    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
+    <toolkit>cudaImportExternalMemory</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaDestroyExternalSemaphore</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates Vulkan CUDA Interop. CUDA imports the Vulkan vertex buffer and operates on it to create sinewave, and synchronizes with Vulkan through vulkan semaphores imported by CUDA. This sample depends on Vulkan SDK, GLFW3 libraries, for building this sample please refer to "Build_instructions.txt" provided in this sample's directory]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -75,6 +75,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/simpleVulkan/README.md b/Samples/5_Domain_Specific/simpleVulkan/README.md
index 27e2dd04..fd287027 100644
--- a/Samples/5_Domain_Specific/simpleVulkan/README.md
+++ b/Samples/5_Domain_Specific/simpleVulkan/README.md
@@ -10,7 +10,7 @@ Graphics Interop, CUDA Vulkan Interop, Data Parallel Algorithms
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaTimelineSemaphore, cudaSignalExternalSemaphoresAsync, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceCount, cudaImportExternalSemaphore, cudaVertMem, cudaImportExternalMemory, cudaDestroyExternalMemory, cudaExternalMemoryGetMappedBuffer, cudaSignalSemaphore, cudaStreamCreateWithFlags, cudaWaitSemaphore, cudaHeightMap, cudaDestroyExternalSemaphore, cudaSetDevice, cudaWaitExternalSemaphoresAsync, cudaStreamSynchronize, cudaGetDeviceProperties
+cudaStreamCreateWithFlags, cudaExternalMemoryGetMappedBuffer, cudaSignalSemaphore, cudaWaitExternalSemaphoresAsync, cudaVertMem, cudaImportExternalSemaphore, cudaWaitSemaphore, cudaHeightMap, cudaSetDevice, cudaGetDeviceCount, cudaSignalExternalSemaphoresAsync, cudaTimelineSemaphore, cudaStreamSynchronize, cudaDestroyExternalMemory, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaImportExternalMemory, cudaGetDeviceProperties, cudaDestroyExternalSemaphore
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [VULKAN](../../../README.md#vulkan)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleVulkan/findvulkan.mk b/Samples/5_Domain_Specific/simpleVulkan/findvulkan.mk
index 4d4e8aed..004ab22b 100644
--- a/Samples/5_Domain_Specific/simpleVulkan/findvulkan.mk
+++ b/Samples/5_Domain_Specific/simpleVulkan/findvulkan.mk
@@ -57,6 +57,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -103,6 +104,11 @@ ifeq ("$(TARGET_OS)","linux")
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
+    ifeq ("$(KYLIN)","0")
+      GLPATH    ?= /usr/lib64/nvidia
+      GLLINK    ?= -L/usr/lib64/nvidia
+      DFLT_PATH ?= /usr/lib64
+    endif
 
   VULKAN_SDK_PATH ?= ${VULKAN_SDK}
 
diff --git a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2017.vcxproj
index 5e7353a0..1ec49d05 100644
--- a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleVulkan.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -121,6 +121,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2019.vcxproj
index 55b1a4b5..4a4e5a63 100644
--- a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleVulkan.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2022.vcxproj
index 9f655efa..81252a96 100644
--- a/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleVulkan/simpleVulkan_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleVulkan.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile b/Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile
index 5a162a2b..42e9802f 100644
--- a/Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile
+++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile
@@ -340,9 +340,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/NsightEclipse.xml b/Samples/5_Domain_Specific/simpleVulkanMMAP/NsightEclipse.xml
index fed119ce..5f91fb68 100644
--- a/Samples/5_Domain_Specific/simpleVulkanMMAP/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/NsightEclipse.xml
@@ -6,36 +6,36 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
+    <driver>cuMemCreate</driver>
+    <driver>cuMemAddressReserve</driver>
+    <driver>cuMemGetAllocationGranularity</driver>
+    <driver>cuMemAddressFree</driver>
+    <driver>cuMemUnmap</driver>
+    <driver>cuMemMap</driver>
     <driver>cuMemRelease</driver>
     <driver>cuMemExportToShareableHandle</driver>
     <driver>cuMemSetAccess</driver>
-    <driver>cuMemMap</driver>
-    <driver>cuMemCreate</driver>
-    <driver>cuMemAddressFree</driver>
-    <driver>cuMemGetAllocationGranularity</driver>
-    <driver>cuMemUnmap</driver>
-    <driver>cuMemAddressReserve</driver>
-    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
+    <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
     <toolkit>cudaImportExternalSemaphore</toolkit>
-    <toolkit>cudaStreamCreateWithFlags</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaStreamDestroy</toolkit>
-    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
     <toolkit>cudaDeviceGetAttribute</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaLaunchHostFunc</toolkit>
+    <toolkit>cudaMallocHost</toolkit>
+    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaMemsetAsync</toolkit>
+    <toolkit>cudaMemcpyAsync</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaStreamCreateWithFlags</toolkit>
+    <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaDestroyExternalSemaphore</toolkit>
     <toolkit>cudaSignalSemaphore</toolkit>
     <toolkit>cudaWaitSemaphore</toolkit>
-    <toolkit>cudaDestroyExternalSemaphore</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
     <toolkit>cudaFree</toolkit>
-    <toolkit>cudaLaunchHostFunc</toolkit>
-    <toolkit>cudaMemsetAsync</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[ This sample demonstrates Vulkan CUDA Interop via cuMemMap APIs. CUDA exports buffers that Vulkan imports as vertex buffer. CUDA invokes kernels to operate on vertices and synchronizes with Vulkan through vulkan semaphores imported by CUDA. This sample depends on Vulkan SDK, GLFW3 libraries, for building this sample please refer to "Build_instructions.txt" provided in this sample's directory]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -91,6 +91,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <sources>
     <extracompilation>../../../Common/helper_multiprocess.cpp</extracompilation>
     <extraheader>../../../Common/helper_multiprocess.h</extraheader>
diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/README.md b/Samples/5_Domain_Specific/simpleVulkanMMAP/README.md
index eea7f28e..3030b57b 100644
--- a/Samples/5_Domain_Specific/simpleVulkanMMAP/README.md
+++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/README.md
@@ -10,7 +10,7 @@ cuMemMap IPC, MMAP, Graphics Interop, CUDA Vulkan Interop, Data Parallel Algorit
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,17 +23,17 @@ x86_64, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
-cuMemRelease, cuMemExportToShareableHandle, cuMemSetAccess, cuMemMap, cuMemCreate, cuMemAddressFree, cuMemGetAllocationGranularity, cuMemUnmap, cuMemAddressReserve
+cuMemCreate, cuMemAddressReserve, cuMemGetAllocationGranularity, cuMemAddressFree, cuMemUnmap, cuMemMap, cuMemRelease, cuMemExportToShareableHandle, cuMemSetAccess
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaImportExternalSemaphore, cudaStreamCreateWithFlags, cudaMemcpyAsync, cudaStreamDestroy, cudaSignalExternalSemaphoresAsync, cudaMallocHost, cudaMalloc, cudaSetDevice, cudaGetDeviceProperties, cudaGetDeviceCount, cudaDeviceGetAttribute, cudaSignalSemaphore, cudaWaitSemaphore, cudaDestroyExternalSemaphore, cudaStreamSynchronize, cudaFree, cudaLaunchHostFunc, cudaMemsetAsync, cudaFreeHost, cudaWaitExternalSemaphoresAsync
+cudaWaitExternalSemaphoresAsync, cudaImportExternalSemaphore, cudaDeviceGetAttribute, cudaSetDevice, cudaLaunchHostFunc, cudaMallocHost, cudaSignalExternalSemaphoresAsync, cudaFreeHost, cudaMemsetAsync, cudaMemcpyAsync, cudaGetDeviceCount, cudaStreamCreateWithFlags, cudaStreamDestroy, cudaDestroyExternalSemaphore, cudaSignalSemaphore, cudaWaitSemaphore, cudaFree, cudaStreamSynchronize, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [VULKAN](../../../README.md#vulkan)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/findvulkan.mk b/Samples/5_Domain_Specific/simpleVulkanMMAP/findvulkan.mk
index 4d4e8aed..004ab22b 100644
--- a/Samples/5_Domain_Specific/simpleVulkanMMAP/findvulkan.mk
+++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/findvulkan.mk
@@ -57,6 +57,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -103,6 +104,11 @@ ifeq ("$(TARGET_OS)","linux")
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
+    ifeq ("$(KYLIN)","0")
+      GLPATH    ?= /usr/lib64/nvidia
+      GLLINK    ?= -L/usr/lib64/nvidia
+      DFLT_PATH ?= /usr/lib64
+    endif
 
   VULKAN_SDK_PATH ?= ${VULKAN_SDK}
 
diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj
index ea52d956..2c6ebec4 100644
--- a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/simpleVulkanMMAP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -123,6 +123,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj
index b41768d6..1343dd04 100644
--- a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleVulkanMMAP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -119,6 +119,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2022.vcxproj b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2022.vcxproj
index a13ab719..cf29fc5c 100644
--- a/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/simpleVulkanMMAP/simpleVulkanMMAP_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/simpleVulkanMMAP.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -119,6 +119,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/smokeParticles/Makefile b/Samples/5_Domain_Specific/smokeParticles/Makefile
index b6f4f0ff..f6afadbe 100644
--- a/Samples/5_Domain_Specific/smokeParticles/Makefile
+++ b/Samples/5_Domain_Specific/smokeParticles/Makefile
@@ -324,9 +324,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/smokeParticles/NsightEclipse.xml b/Samples/5_Domain_Specific/smokeParticles/NsightEclipse.xml
index ca2f9c07..581dba9a 100644
--- a/Samples/5_Domain_Specific/smokeParticles/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/smokeParticles/NsightEclipse.xml
@@ -6,10 +6,10 @@
     <flag>--std=c++14</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaCreateTextureObject</toolkit>
     <toolkit>cudaExtent</toolkit>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
     <toolkit>cudaPitchedPtr</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
   </cuda_api_list>
   <description><![CDATA[Smoke simulation with volumetric shadows using half-angle slicing technique. Uses CUDA for procedural simulation, Thrust Library for sorting algorithms, and OpenGL for graphics rendering.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -79,6 +79,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/smokeParticles/README.md b/Samples/5_Domain_Specific/smokeParticles/README.md
index e5082eba..3750d773 100644
--- a/Samples/5_Domain_Specific/smokeParticles/README.md
+++ b/Samples/5_Domain_Specific/smokeParticles/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaCreateTextureObject, cudaExtent, cudaMemcpyToSymbol, cudaPitchedPtr
+cudaExtent, cudaPitchedPtr, cudaCreateTextureObject, cudaMemcpyToSymbol
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/smokeParticles/findgllib.mk b/Samples/5_Domain_Specific/smokeParticles/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/5_Domain_Specific/smokeParticles/findgllib.mk
+++ b/Samples/5_Domain_Specific/smokeParticles/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2017.vcxproj b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2017.vcxproj
index 07d6838a..1807407d 100644
--- a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/smokeParticles.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -137,6 +137,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2019.vcxproj b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2019.vcxproj
index e69a7c0b..661e6410 100644
--- a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/smokeParticles.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -133,6 +133,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2022.vcxproj b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2022.vcxproj
index 0a64cc05..d6bb21aa 100644
--- a/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/smokeParticles/smokeParticles_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/smokeParticles.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -133,6 +133,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/stereoDisparity/Makefile b/Samples/5_Domain_Specific/stereoDisparity/Makefile
index 4b2ae8d0..7608b56b 100644
--- a/Samples/5_Domain_Specific/stereoDisparity/Makefile
+++ b/Samples/5_Domain_Specific/stereoDisparity/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/stereoDisparity/NsightEclipse.xml b/Samples/5_Domain_Specific/stereoDisparity/NsightEclipse.xml
index e4c72e7f..a0249c74 100644
--- a/Samples/5_Domain_Specific/stereoDisparity/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/stereoDisparity/NsightEclipse.xml
@@ -6,16 +6,16 @@
     <clean>*.pgm</clean>
   </cleanextras>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[A CUDA program that demonstrates how to compute a stereo disparity map using SIMD SAD (Sum of Absolute Difference) intrinsics.  Requires Compute Capability 2.0 or higher.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -61,6 +61,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/stereoDisparity/README.md b/Samples/5_Domain_Specific/stereoDisparity/README.md
index 1034eb62..81b9eca4 100644
--- a/Samples/5_Domain_Specific/stereoDisparity/README.md
+++ b/Samples/5_Domain_Specific/stereoDisparity/README.md
@@ -10,7 +10,7 @@ Image Processing, Video Intrinsics
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaEventSynchronize, cudaCreateTextureObject, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaEventSynchronize, cudaDeviceSynchronize, cudaCreateTextureObject, cudaEventRecord, cudaMalloc, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2017.vcxproj b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2017.vcxproj
index eb5a3d5b..9305e49d 100644
--- a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/stereoDisparity.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2019.vcxproj b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2019.vcxproj
index 1147bc9d..bea60c05 100644
--- a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/stereoDisparity.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2022.vcxproj b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2022.vcxproj
index 16e56415..0b22b48a 100644
--- a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/stereoDisparity.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/volumeFiltering/Makefile b/Samples/5_Domain_Specific/volumeFiltering/Makefile
index dfcda5bb..438c552e 100644
--- a/Samples/5_Domain_Specific/volumeFiltering/Makefile
+++ b/Samples/5_Domain_Specific/volumeFiltering/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/volumeFiltering/NsightEclipse.xml b/Samples/5_Domain_Specific/volumeFiltering/NsightEclipse.xml
index 813926c5..7b54f46a 100644
--- a/Samples/5_Domain_Specific/volumeFiltering/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/volumeFiltering/NsightEclipse.xml
@@ -3,25 +3,25 @@
 <entry>
   <name>volumeFiltering</name>
   <cuda_api_list>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
     <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
     <toolkit>cudaDestroySurfaceObject</toolkit>
-    <toolkit>cudaPitchedPtr</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaExtent</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaCreateSurfaceObject</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaPitchedPtr</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
     <toolkit>cudaDestroyTextureObject</toolkit>
     <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
     <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaExtent</toolkit>
-    <toolkit>cudaCreateSurfaceObject</toolkit>
-    <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaGraphicsUnmapResources</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates 3D Volumetric Filtering using 3D Textures and 3D Surface Writes.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -86,6 +86,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/volumeFiltering/README.md b/Samples/5_Domain_Specific/volumeFiltering/README.md
index 2476ecf5..a2bc76e3 100644
--- a/Samples/5_Domain_Specific/volumeFiltering/README.md
+++ b/Samples/5_Domain_Specific/volumeFiltering/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing, 3D Textures, Surface Writes
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGraphicsMapResources, cudaGraphicsResourceGetMappedPointer, cudaMemcpy, cudaMemcpyToSymbol, cudaDestroySurfaceObject, cudaPitchedPtr, cudaMalloc, cudaGraphicsUnregisterResource, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaMemset, cudaFree, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaExtent, cudaCreateSurfaceObject, cudaCreateTextureObject, cudaMallocArray, cudaGraphicsUnmapResources
+cudaMemcpy, cudaGraphicsMapResources, cudaDestroySurfaceObject, cudaExtent, cudaDeviceSynchronize, cudaCreateSurfaceObject, cudaMemcpyToSymbol, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaCreateTextureObject, cudaGraphicsUnmapResources, cudaMallocArray, cudaFreeArray, cudaDestroyTextureObject, cudaMemset, cudaGraphicsGLRegisterBuffer, cudaFree, cudaGraphicsUnregisterResource, cudaMalloc
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/volumeFiltering/findgllib.mk b/Samples/5_Domain_Specific/volumeFiltering/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/5_Domain_Specific/volumeFiltering/findgllib.mk
+++ b/Samples/5_Domain_Specific/volumeFiltering/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2017.vcxproj b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2017.vcxproj
index 71b27a63..6e068663 100644
--- a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/volumeFiltering.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -122,6 +122,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2019.vcxproj b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2019.vcxproj
index af2faa78..2a01aa1c 100644
--- a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/volumeFiltering.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2022.vcxproj b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2022.vcxproj
index 3dc02a43..04608aae 100644
--- a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/volumeFiltering.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/volumeRender/Makefile b/Samples/5_Domain_Specific/volumeRender/Makefile
index b2a74d0d..fdb5649f 100644
--- a/Samples/5_Domain_Specific/volumeRender/Makefile
+++ b/Samples/5_Domain_Specific/volumeRender/Makefile
@@ -299,9 +299,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/volumeRender/NsightEclipse.xml b/Samples/5_Domain_Specific/volumeRender/NsightEclipse.xml
index bfef15a2..05b789d9 100644
--- a/Samples/5_Domain_Specific/volumeRender/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/volumeRender/NsightEclipse.xml
@@ -3,24 +3,24 @@
 <entry>
   <name>volumeRender</name>
   <cuda_api_list>
-    <toolkit>cudaMemcpyToSymbol</toolkit>
-    <toolkit>cudaMemset</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGraphicsMapResources</toolkit>
-    <toolkit>cudaFreeArray</toolkit>
-    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
-    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
-    <toolkit>cudaExtent</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaPitchedPtr</toolkit>
-    <toolkit>cudaCreateTextureObject</toolkit>
-    <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaProfilerStop</toolkit>
-    <toolkit>cudaMallocArray</toolkit>
-    <toolkit>cudaGraphicsUnregisterResource</toolkit>
     <toolkit>cudaGraphicsUnmapResources</toolkit>
     <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaMallocArray</toolkit>
+    <toolkit>cudaFreeArray</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaPitchedPtr</toolkit>
+    <toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
+    <toolkit>cudaGraphicsMapResources</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaExtent</toolkit>
+    <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpyToSymbol</toolkit>
+    <toolkit>cudaGraphicsUnregisterResource</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates basic volume rendering using 3D Textures.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -84,6 +84,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/volumeRender/README.md b/Samples/5_Domain_Specific/volumeRender/README.md
index acc1fb2a..d5d6ef58 100644
--- a/Samples/5_Domain_Specific/volumeRender/README.md
+++ b/Samples/5_Domain_Specific/volumeRender/README.md
@@ -10,7 +10,7 @@ Graphics Interop, Image Processing, 3D Textures
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemcpyToSymbol, cudaMemset, cudaFree, cudaGraphicsMapResources, cudaFreeArray, cudaGraphicsGLRegisterBuffer, cudaGraphicsResourceGetMappedPointer, cudaExtent, cudaDeviceSynchronize, cudaDestroyTextureObject, cudaPitchedPtr, cudaCreateTextureObject, cudaMalloc, cudaProfilerStop, cudaMallocArray, cudaGraphicsUnregisterResource, cudaGraphicsUnmapResources, cudaMemcpy
+cudaProfilerStop, cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaPitchedPtr, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaExtent, cudaDeviceSynchronize, cudaCreateTextureObject, cudaMemset, cudaMemcpyToSymbol, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [GL](../../../README.md#gl)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/volumeRender/findgllib.mk b/Samples/5_Domain_Specific/volumeRender/findgllib.mk
index f0a5c551..998fcf0f 100644
--- a/Samples/5_Domain_Specific/volumeRender/findgllib.mk
+++ b/Samples/5_Domain_Specific/volumeRender/findgllib.mk
@@ -53,11 +53,12 @@ endif
 ifeq ("$(TARGET_OS)","linux")
     # $(info) >> findgllib.mk -> LINUX path <<<)
     # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
-    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu      >/dev/null 2>&1; echo $$?)
-    FEDORA = $(shell echo $(DISTRO) | grep -i fedora      >/dev/null 2>&1; echo $$?)
-    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
-    CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
+    UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu       >/dev/null 2>&1; echo $$?)
+    FEDORA = $(shell echo $(DISTRO) | grep -i fedora       >/dev/null 2>&1; echo $$?)
+    RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel'  >/dev/null 2>&1; echo $$?)
+    CENTOS = $(shell echo $(DISTRO) | grep -i centos       >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -87,27 +88,17 @@ ifeq ("$(TARGET_OS)","linux")
         DFLT_PATH ?= /usr/lib
       endif
     endif
+
     ifeq ("$(SUSE)","0")
       GLPATH    ?= /usr/X11R6/lib64
       GLLINK    ?= -L/usr/X11R6/lib64
       DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(FEDORA)","0")
+    else 
       GLPATH    ?= /usr/lib64/nvidia
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
-    ifeq ("$(RHEL)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-    ifeq ("$(CENTOS)","0")
-      GLPATH    ?= /usr/lib64/nvidia
-      GLLINK    ?= -L/usr/lib64/nvidia
-      DFLT_PATH ?= /usr/lib64
-    endif
-  
+      
   # find libGL, libGLU 
   GLLIB  := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGL.so  -print 2>/dev/null)
   GLULIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLU.so -print 2>/dev/null)
diff --git a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2017.vcxproj b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2017.vcxproj
index 3e6388ca..ab6037e9 100644
--- a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/volumeRender.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2019.vcxproj b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2019.vcxproj
index 44536db8..c752510d 100644
--- a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/volumeRender.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2022.vcxproj b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2022.vcxproj
index 30b3f8fe..77698195 100644
--- a/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/volumeRender/volumeRender_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/volumeRender.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/Makefile b/Samples/5_Domain_Specific/vulkanImageCUDA/Makefile
index 7e592bbc..56b3696a 100644
--- a/Samples/5_Domain_Specific/vulkanImageCUDA/Makefile
+++ b/Samples/5_Domain_Specific/vulkanImageCUDA/Makefile
@@ -338,9 +338,9 @@ endif
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/NsightEclipse.xml b/Samples/5_Domain_Specific/vulkanImageCUDA/NsightEclipse.xml
index 33025d3a..4fa867e9 100644
--- a/Samples/5_Domain_Specific/vulkanImageCUDA/NsightEclipse.xml
+++ b/Samples/5_Domain_Specific/vulkanImageCUDA/NsightEclipse.xml
@@ -6,33 +6,33 @@
     <flag>--std=c++11</flag>
   </cflags>
   <cuda_api_list>
-    <toolkit>cudaGetMipmappedArrayLevel</toolkit>
-    <toolkit>cudaImportExternalSemaphore</toolkit>
-    <toolkit>cudaExternalMemoryGetMappedMipmappedArray</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaDestroyExternalMemory</toolkit>
-    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
-    <toolkit>cudaFreeMipmappedArray</toolkit>
     <toolkit>cudaVkSemaphoreSignal</toolkit>
-    <toolkit>cudaVkImportImageMem</toolkit>
-    <toolkit>cudaDestroySurfaceObject</toolkit>
-    <toolkit>cudaImportExternalMemory</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaSetDevice</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
-    <toolkit>cudaGetDeviceCount</toolkit>
-    <toolkit>cudaDestroyTextureObject</toolkit>
-    <toolkit>cudaUpdateVkImage</toolkit>
-    <toolkit>cudaDestroyExternalSemaphore</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaVkSemaphoreWait</toolkit>
-    <toolkit>cudaExtent</toolkit>
-    <toolkit>cudaVkImportSemaphore</toolkit>
-    <toolkit>cudaCreateSurfaceObject</toolkit>
-    <toolkit>cudaMallocMipmappedArray</toolkit>
-    <toolkit>cudaCreateTextureObject</toolkit>
     <toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaVkImportSemaphore</toolkit>
+    <toolkit>cudaImportExternalSemaphore</toolkit>
+    <toolkit>cudaGetMipmappedArrayLevel</toolkit>
+    <toolkit>cudaSetDevice</toolkit>
+    <toolkit>cudaDestroySurfaceObject</toolkit>
+    <toolkit>cudaExtent</toolkit>
+    <toolkit>cudaMallocMipmappedArray</toolkit>
+    <toolkit>cudaCreateSurfaceObject</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
+    <toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
+    <toolkit>cudaUpdateVkImage</toolkit>
+    <toolkit>cudaCreateTextureObject</toolkit>
+    <toolkit>cudaGetDeviceCount</toolkit>
+    <toolkit>cudaExternalMemoryGetMappedMipmappedArray</toolkit>
+    <toolkit>cudaDestroyTextureObject</toolkit>
+    <toolkit>cudaVkImportImageMem</toolkit>
+    <toolkit>cudaDestroyExternalMemory</toolkit>
+    <toolkit>cudaVkSemaphoreWait</toolkit>
+    <toolkit>cudaImportExternalMemory</toolkit>
+    <toolkit>cudaDestroyExternalSemaphore</toolkit>
+    <toolkit>cudaFreeMipmappedArray</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates Vulkan Image - CUDA Interop. CUDA imports the Vulkan image buffer, performs box filtering over it, and synchronizes with Vulkan through vulkan semaphores imported by CUDA. This sample depends on Vulkan SDK, GLFW3 libraries, for building this sample please refer to "Build_instructions.txt" provided in this sample's directory]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -80,6 +80,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/README.md b/Samples/5_Domain_Specific/vulkanImageCUDA/README.md
index 4b5e262a..97f2de7d 100644
--- a/Samples/5_Domain_Specific/vulkanImageCUDA/README.md
+++ b/Samples/5_Domain_Specific/vulkanImageCUDA/README.md
@@ -10,7 +10,7 @@ Graphics Interop, CUDA Vulkan Interop, Data Parallel Algorithms
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGetMipmappedArrayLevel, cudaImportExternalSemaphore, cudaExternalMemoryGetMappedMipmappedArray, cudaMemcpy, cudaDestroyExternalMemory, cudaSignalExternalSemaphoresAsync, cudaFreeMipmappedArray, cudaVkSemaphoreSignal, cudaVkImportImageMem, cudaDestroySurfaceObject, cudaImportExternalMemory, cudaMalloc, cudaSetDevice, cudaGetDeviceProperties, cudaGetDeviceCount, cudaDestroyTextureObject, cudaUpdateVkImage, cudaDestroyExternalSemaphore, cudaFree, cudaStreamCreate, cudaVkSemaphoreWait, cudaExtent, cudaVkImportSemaphore, cudaCreateSurfaceObject, cudaMallocMipmappedArray, cudaCreateTextureObject, cudaWaitExternalSemaphoresAsync
+cudaVkSemaphoreSignal, cudaWaitExternalSemaphoresAsync, cudaMemcpy, cudaVkImportSemaphore, cudaImportExternalSemaphore, cudaGetMipmappedArrayLevel, cudaSetDevice, cudaDestroySurfaceObject, cudaExtent, cudaMallocMipmappedArray, cudaCreateSurfaceObject, cudaStreamCreate, cudaSignalExternalSemaphoresAsync, cudaUpdateVkImage, cudaCreateTextureObject, cudaGetDeviceCount, cudaExternalMemoryGetMappedMipmappedArray, cudaDestroyTextureObject, cudaVkImportImageMem, cudaDestroyExternalMemory, cudaVkSemaphoreWait, cudaImportExternalMemory, cudaDestroyExternalSemaphore, cudaFreeMipmappedArray, cudaFree, cudaMalloc, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [X11](../../../README.md#x11), [VULKAN](../../../README.md#vulkan)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/findvulkan.mk b/Samples/5_Domain_Specific/vulkanImageCUDA/findvulkan.mk
index 4d4e8aed..004ab22b 100644
--- a/Samples/5_Domain_Specific/vulkanImageCUDA/findvulkan.mk
+++ b/Samples/5_Domain_Specific/vulkanImageCUDA/findvulkan.mk
@@ -57,6 +57,7 @@ ifeq ("$(TARGET_OS)","linux")
     RHEL   = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
     CENTOS = $(shell echo $(DISTRO) | grep -i centos      >/dev/null 2>&1; echo $$?)
     SUSE   = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
+    KYLIN  = $(shell echo $(DISTRO) | grep -i kylin        >/dev/null 2>&1; echo $$?)
     ifeq ("$(UBUNTU)","0")
       ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
         GLPATH := /usr/arm-linux-gnueabihf/lib
@@ -103,6 +104,11 @@ ifeq ("$(TARGET_OS)","linux")
       GLLINK    ?= -L/usr/lib64/nvidia
       DFLT_PATH ?= /usr/lib64
     endif
+    ifeq ("$(KYLIN)","0")
+      GLPATH    ?= /usr/lib64/nvidia
+      GLLINK    ?= -L/usr/lib64/nvidia
+      DFLT_PATH ?= /usr/lib64
+    endif
 
   VULKAN_SDK_PATH ?= ${VULKAN_SDK}
 
diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj
index 93db012f..424170ab 100644
--- a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj
+++ b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/vulkanImageCUDA.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -117,6 +117,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj
index 632afdc2..769231d5 100644
--- a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj
+++ b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/vulkanImageCUDA.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2022.vcxproj b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2022.vcxproj
index 530a2f71..99726397 100644
--- a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2022.vcxproj
+++ b/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/vulkanImageCUDA.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/6_Performance/UnifiedMemoryPerf/Makefile b/Samples/6_Performance/UnifiedMemoryPerf/Makefile
index 05eb45ee..2f1a6188 100644
--- a/Samples/6_Performance/UnifiedMemoryPerf/Makefile
+++ b/Samples/6_Performance/UnifiedMemoryPerf/Makefile
@@ -285,9 +285,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/6_Performance/UnifiedMemoryPerf/NsightEclipse.xml b/Samples/6_Performance/UnifiedMemoryPerf/NsightEclipse.xml
index a3b50f03..6217899d 100644
--- a/Samples/6_Performance/UnifiedMemoryPerf/NsightEclipse.xml
+++ b/Samples/6_Performance/UnifiedMemoryPerf/NsightEclipse.xml
@@ -3,19 +3,19 @@
 <entry>
   <name>UnifiedMemoryPerf</name>
   <cuda_api_list>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaStreamDestroy</toolkit>
+    <toolkit>cudaMemPrefetchAsync</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaMallocHost</toolkit>
     <toolkit>cudaMallocManaged</toolkit>
-    <toolkit>cudaMemPrefetchAsync</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaStreamAttachMemAsync</toolkit>
+    <toolkit>cudaHostGetDevicePointer</toolkit>
     <toolkit>cudaFreeHost</toolkit>
+    <toolkit>cudaStreamSynchronize</toolkit>
     <toolkit>cudaMalloc</toolkit>
     <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaHostGetDevicePointer</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
+    <toolkit>cudaStreamCreate</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates the performance comparision using matrix multiplication kernel of Unified Memory with/without hints and other types of memory like zero copy buffers, pageable, pagelocked memory performing synchronous and Asynchronous transfers on a single GPU.]]></description>
@@ -66,6 +66,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/6_Performance/UnifiedMemoryPerf/README.md b/Samples/6_Performance/UnifiedMemoryPerf/README.md
index 575b7ee1..6c1ce62c 100644
--- a/Samples/6_Performance/UnifiedMemoryPerf/README.md
+++ b/Samples/6_Performance/UnifiedMemoryPerf/README.md
@@ -10,7 +10,7 @@ CUDA Systems Integration, Unified Memory, CUDA Streams and Events, Pinned System
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaStreamDestroy, cudaFree, cudaMallocHost, cudaMallocManaged, cudaMemPrefetchAsync, cudaStreamCreate, cudaStreamAttachMemAsync, cudaFreeHost, cudaMalloc, cudaMemcpyAsync, cudaStreamSynchronize, cudaHostGetDevicePointer, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaStreamDestroy, cudaMemPrefetchAsync, cudaFree, cudaMallocHost, cudaMallocManaged, cudaStreamAttachMemAsync, cudaHostGetDevicePointer, cudaFreeHost, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync, cudaStreamCreate, cudaGetDeviceProperties
 
 ## Dependencies needed to build/run
 [UVM](../../../README.md#uvm)
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj
index 5b99766a..7d59c0ae 100644
--- a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj
+++ b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/UnifiedMemoryPerf.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -110,6 +110,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj
index 823351a1..9b5e3657 100644
--- a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj
+++ b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/UnifiedMemoryPerf.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2022.vcxproj b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2022.vcxproj
index 7dfac552..536d6d5b 100644
--- a/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2022.vcxproj
+++ b/Samples/6_Performance/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/UnifiedMemoryPerf.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/6_Performance/alignedTypes/Makefile b/Samples/6_Performance/alignedTypes/Makefile
index 907e3f44..492ad197 100644
--- a/Samples/6_Performance/alignedTypes/Makefile
+++ b/Samples/6_Performance/alignedTypes/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/6_Performance/alignedTypes/NsightEclipse.xml b/Samples/6_Performance/alignedTypes/NsightEclipse.xml
index 388b2897..0b1d16af 100644
--- a/Samples/6_Performance/alignedTypes/NsightEclipse.xml
+++ b/Samples/6_Performance/alignedTypes/NsightEclipse.xml
@@ -3,11 +3,11 @@
 <entry>
   <name>alignedTypes</name>
   <cuda_api_list>
-    <toolkit>cudaMemset</toolkit>
+    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaFree</toolkit>
     <toolkit>cudaDeviceSynchronize</toolkit>
+    <toolkit>cudaMemset</toolkit>
     <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpy</toolkit>
     <toolkit>cudaGetDeviceProperties</toolkit>
   </cuda_api_list>
   <description><![CDATA[A simple test, showing huge access speed gap between aligned and misaligned structures. It measures per-element copy throughput for aligned and misaligned structures on big chunks of data.]]></description>
@@ -46,6 +46,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/6_Performance/alignedTypes/README.md b/Samples/6_Performance/alignedTypes/README.md
index 85c8bd55..56e5e265 100644
--- a/Samples/6_Performance/alignedTypes/README.md
+++ b/Samples/6_Performance/alignedTypes/README.md
@@ -10,7 +10,7 @@ Performance Strategies
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaMemset, cudaFree, cudaDeviceSynchronize, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
+cudaMemcpy, cudaFree, cudaDeviceSynchronize, cudaMemset, cudaMalloc, cudaGetDeviceProperties
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/6_Performance/alignedTypes/alignedTypes_vs2017.vcxproj b/Samples/6_Performance/alignedTypes/alignedTypes_vs2017.vcxproj
index 73e89699..7e3439f6 100644
--- a/Samples/6_Performance/alignedTypes/alignedTypes_vs2017.vcxproj
+++ b/Samples/6_Performance/alignedTypes/alignedTypes_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/alignedTypes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/6_Performance/alignedTypes/alignedTypes_vs2019.vcxproj b/Samples/6_Performance/alignedTypes/alignedTypes_vs2019.vcxproj
index da732d28..833b531c 100644
--- a/Samples/6_Performance/alignedTypes/alignedTypes_vs2019.vcxproj
+++ b/Samples/6_Performance/alignedTypes/alignedTypes_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/alignedTypes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/6_Performance/alignedTypes/alignedTypes_vs2022.vcxproj b/Samples/6_Performance/alignedTypes/alignedTypes_vs2022.vcxproj
index d24f2749..ea911036 100644
--- a/Samples/6_Performance/alignedTypes/alignedTypes_vs2022.vcxproj
+++ b/Samples/6_Performance/alignedTypes/alignedTypes_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/alignedTypes.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/6_Performance/transpose/Makefile b/Samples/6_Performance/transpose/Makefile
index 50355e56..83909bce 100644
--- a/Samples/6_Performance/transpose/Makefile
+++ b/Samples/6_Performance/transpose/Makefile
@@ -279,9 +279,9 @@ LIBRARIES :=
 
 # Gencode arguments
 ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87
+SMS ?= 53 61 70 72 75 80 86 87 90
 else
-SMS ?= 35 37 50 52 60 61 70 75 80 86
+SMS ?= 35 37 50 52 60 61 70 75 80 86 90
 endif
 
 ifeq ($(SMS),)
diff --git a/Samples/6_Performance/transpose/NsightEclipse.xml b/Samples/6_Performance/transpose/NsightEclipse.xml
index a080824a..58f448f6 100644
--- a/Samples/6_Performance/transpose/NsightEclipse.xml
+++ b/Samples/6_Performance/transpose/NsightEclipse.xml
@@ -3,17 +3,17 @@
 <entry>
   <name>transpose</name>
   <cuda_api_list>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaEventRecord</toolkit>
-    <toolkit>cudaEventCreate</toolkit>
-    <toolkit>cudaEventElapsedTime</toolkit>
-    <toolkit>cudaEventSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaEventDestroy</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
     <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaMalloc</toolkit>
+    <toolkit>cudaFree</toolkit>
+    <toolkit>cudaGetLastError</toolkit>
+    <toolkit>cudaEventSynchronize</toolkit>
+    <toolkit>cudaEventRecord</toolkit>
     <toolkit>cudaGetDevice</toolkit>
+    <toolkit>cudaEventDestroy</toolkit>
+    <toolkit>cudaEventElapsedTime</toolkit>
+    <toolkit>cudaGetDeviceProperties</toolkit>
+    <toolkit>cudaEventCreate</toolkit>
   </cuda_api_list>
   <description><![CDATA[This sample demonstrates Matrix Transpose.  Different performance are shown to achieve high performance.]]></description>
   <devicecompilation>whole</devicecompilation>
@@ -55,6 +55,7 @@
   <sm-arch>sm80</sm-arch>
   <sm-arch>sm86</sm-arch>
   <sm-arch>sm87</sm-arch>
+  <sm-arch>sm90</sm-arch>
   <supported_envs>
     <env>
       <arch>x86_64</arch>
diff --git a/Samples/6_Performance/transpose/README.md b/Samples/6_Performance/transpose/README.md
index 2d787068..0693888b 100644
--- a/Samples/6_Performance/transpose/README.md
+++ b/Samples/6_Performance/transpose/README.md
@@ -10,7 +10,7 @@ Performance Strategies, Linear Algebra
 
 ## Supported SM Architectures
 
-[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
 
 ## Supported OSes
 
@@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
 ## CUDA APIs involved
 
 ### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaFree, cudaEventRecord, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaMalloc, cudaEventDestroy, cudaGetLastError, cudaMemcpy, cudaGetDeviceProperties, cudaGetDevice
+cudaMemcpy, cudaMalloc, cudaFree, cudaGetLastError, cudaEventSynchronize, cudaEventRecord, cudaGetDevice, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/6_Performance/transpose/transpose_vs2017.vcxproj b/Samples/6_Performance/transpose/transpose_vs2017.vcxproj
index 0c04b15f..a9f215af 100644
--- a/Samples/6_Performance/transpose/transpose_vs2017.vcxproj
+++ b/Samples/6_Performance/transpose/transpose_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -67,7 +67,7 @@
       <OutputFile>$(OutDir)/transpose.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/6_Performance/transpose/transpose_vs2019.vcxproj b/Samples/6_Performance/transpose/transpose_vs2019.vcxproj
index 6cf4cb22..e4721874 100644
--- a/Samples/6_Performance/transpose/transpose_vs2019.vcxproj
+++ b/Samples/6_Performance/transpose/transpose_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/transpose.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/6_Performance/transpose/transpose_vs2022.vcxproj b/Samples/6_Performance/transpose/transpose_vs2022.vcxproj
index 3c7e497c..1db2b8cd 100644
--- a/Samples/6_Performance/transpose/transpose_vs2022.vcxproj
+++ b/Samples/6_Performance/transpose/transpose_vs2022.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -63,7 +63,7 @@
       <OutputFile>$(OutDir)/transpose.exe</OutputFile>
     </Link>
     <CudaCompile>
-      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
       <AdditionalOptions>-Xcompiler "/wd 4819"  --threads 0 </AdditionalOptions>
       <Include>./;../../../Common</Include>
       <Defines>WIN32</Defines>
@@ -103,6 +103,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
   </ImportGroup>
 </Project>