From e950012e722c4cb583f4ae7ce01d07a78d5e5e60 Mon Sep 17 00:00:00 2001
From: Rutwik Choughule <rutwik.choughule@gmail.com>
Date: Wed, 30 Jun 2021 11:26:41 +0530
Subject: [PATCH] add and update samples with CUDA 11.4 support

---
 README.md                                     |  66 +-
 Samples/EGLStream_CUDA_Interop/Makefile       |   6 -
 Samples/EGLStream_CUDA_Interop/README.md      |   2 +-
 Samples/MersenneTwisterGP11213/Makefile       |  29 +-
 .../MersenneTwisterGP11213_vs2017.vcxproj     |   4 +-
 .../MersenneTwisterGP11213_vs2019.vcxproj     |   4 +-
 Samples/MersenneTwisterGP11213/README.md      |   2 +-
 .../NV12toBGRandResize_vs2017.vcxproj         |   4 +-
 .../NV12toBGRandResize_vs2019.vcxproj         |   4 +-
 Samples/NV12toBGRandResize/README.md          |   2 +-
 Samples/UnifiedMemoryPerf/README.md           |   2 +-
 .../UnifiedMemoryPerf_vs2017.vcxproj          |   4 +-
 .../UnifiedMemoryPerf_vs2019.vcxproj          |   4 +-
 Samples/bandwidthTest/README.md               |   2 +-
 .../bandwidthTest_vs2017.vcxproj              |   4 +-
 .../bandwidthTest_vs2019.vcxproj              |   4 +-
 .../Makefile                                  |   6 -
 .../README.md                                 |   2 +-
 ...rkersAndLabelCompressionNPP_vs2017.vcxproj |   4 +-
 ...rkersAndLabelCompressionNPP_vs2019.vcxproj |   4 +-
 Samples/bf16TensorCoreGemm/README.md          |   2 +-
 .../bf16TensorCoreGemm_vs2017.vcxproj         |   4 +-
 .../bf16TensorCoreGemm_vs2019.vcxproj         |   4 +-
 Samples/binaryPartitionCG/README.md           |   2 +-
 .../binaryPartitionCG_vs2017.vcxproj          |   4 +-
 .../binaryPartitionCG_vs2019.vcxproj          |   4 +-
 Samples/boxFilterNPP/README.md                |   2 +-
 .../boxFilterNPP/boxFilterNPP_vs2017.vcxproj  |   4 +-
 .../boxFilterNPP/boxFilterNPP_vs2019.vcxproj  |   4 +-
 Samples/cannyEdgeDetectorNPP/README.md        |   2 +-
 .../cannyEdgeDetectorNPP_vs2017.vcxproj       |   4 +-
 .../cannyEdgeDetectorNPP_vs2019.vcxproj       |   4 +-
 Samples/cdpQuadtree/Makefile                  | 370 +++++++++
 Samples/cdpQuadtree/NsightEclipse.xml         |  72 ++
 Samples/cdpQuadtree/README.md                 |  71 ++
 Samples/cdpQuadtree/cdpQuadtree.cu            | 742 ++++++++++++++++++
 Samples/cdpQuadtree/cdpQuadtree_vs2017.sln    |  20 +
 .../cdpQuadtree/cdpQuadtree_vs2017.vcxproj    | 114 +++
 Samples/cdpQuadtree/cdpQuadtree_vs2019.sln    |  20 +
 .../cdpQuadtree/cdpQuadtree_vs2019.vcxproj    | 110 +++
 Samples/concurrentKernels/README.md           |   2 +-
 .../concurrentKernels_vs2017.vcxproj          |   4 +-
 .../concurrentKernels_vs2019.vcxproj          |   4 +-
 Samples/conjugateGradientCudaGraphs/Makefile  |   6 -
 Samples/conjugateGradientCudaGraphs/README.md |   2 +-
 ...conjugateGradientCudaGraphs_vs2017.vcxproj |   4 +-
 ...conjugateGradientCudaGraphs_vs2019.vcxproj |   4 +-
 .../conjugateGradientMultiBlockCG/README.md   |   2 +-
 ...njugateGradientMultiBlockCG_vs2017.vcxproj |   4 +-
 ...njugateGradientMultiBlockCG_vs2019.vcxproj |   4 +-
 .../conjugateGradientMultiDeviceCG/README.md  |   2 +-
 ...jugateGradientMultiDeviceCG_vs2017.vcxproj |   4 +-
 ...jugateGradientMultiDeviceCG_vs2019.vcxproj |   4 +-
 Samples/cuSolverDn_LinearSolver/Makefile      |   6 -
 Samples/cuSolverDn_LinearSolver/README.md     |   2 +-
 .../cuSolverDn_LinearSolver_vs2017.vcxproj    |   4 +-
 .../cuSolverDn_LinearSolver_vs2019.vcxproj    |   4 +-
 Samples/cuSolverSp_LinearSolver/Makefile      |   6 -
 Samples/cuSolverSp_LinearSolver/README.md     |   2 +-
 .../cuSolverSp_LinearSolver_vs2017.vcxproj    |   4 +-
 .../cuSolverSp_LinearSolver_vs2019.vcxproj    |   4 +-
 Samples/cudaCompressibleMemory/README.md      |   2 +-
 .../cudaCompressibleMemory_vs2017.vcxproj     |   4 +-
 .../cudaCompressibleMemory_vs2019.vcxproj     |   4 +-
 Samples/cudaNvSci/Makefile                    |   6 -
 Samples/cudaNvSci/README.md                   |   2 +-
 Samples/cudaNvSciNvMedia/README.md            |   2 +-
 Samples/cudaOpenMP/README.md                  |   2 +-
 Samples/cudaOpenMP/cudaOpenMP_vs2017.vcxproj  |   4 +-
 Samples/cudaOpenMP/cudaOpenMP_vs2019.vcxproj  |   4 +-
 Samples/cudaTensorCoreGemm/README.md          |   2 +-
 .../cudaTensorCoreGemm_vs2017.vcxproj         |   4 +-
 .../cudaTensorCoreGemm_vs2019.vcxproj         |   4 +-
 Samples/deviceQuery/README.md                 |   2 +-
 .../deviceQuery/deviceQuery_vs2017.vcxproj    |   4 +-
 .../deviceQuery/deviceQuery_vs2019.vcxproj    |   4 +-
 Samples/dmmaTensorCoreGemm/README.md          |   2 +-
 .../dmmaTensorCoreGemm_vs2017.vcxproj         |   4 +-
 .../dmmaTensorCoreGemm_vs2019.vcxproj         |   4 +-
 Samples/globalToShmemAsyncCopy/README.md      |   2 +-
 .../globalToShmemAsyncCopy_vs2017.vcxproj     |   4 +-
 .../globalToShmemAsyncCopy_vs2019.vcxproj     |   4 +-
 Samples/immaTensorCoreGemm/README.md          |   2 +-
 .../immaTensorCoreGemm_vs2017.vcxproj         |   4 +-
 .../immaTensorCoreGemm_vs2019.vcxproj         |   4 +-
 Samples/jacobiCudaGraphs/README.md            |   2 +-
 .../jacobiCudaGraphs_vs2017.vcxproj           |   4 +-
 .../jacobiCudaGraphs_vs2019.vcxproj           |   4 +-
 Samples/matrixMul/README.md                   |   2 +-
 Samples/matrixMul/matrixMul_vs2017.vcxproj    |   4 +-
 Samples/matrixMul/matrixMul_vs2019.vcxproj    |   4 +-
 Samples/matrixMulDrv/README.md                |   2 +-
 .../matrixMulDrv/matrixMulDrv_vs2017.vcxproj  |   4 +-
 .../matrixMulDrv/matrixMulDrv_vs2019.vcxproj  |   4 +-
 Samples/memMapIPCDrv/README.md                |   2 +-
 .../memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj  |   4 +-
 .../memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj  |   4 +-
 Samples/nvJPEG/Makefile                       |   6 -
 Samples/nvJPEG/README.md                      |   2 +-
 Samples/nvJPEG/nvJPEG_vs2017.vcxproj          |   4 +-
 Samples/nvJPEG/nvJPEG_vs2019.vcxproj          |   4 +-
 Samples/nvJPEG_encoder/Makefile               |   6 -
 Samples/nvJPEG_encoder/README.md              |   2 +-
 .../nvJPEG_encoder_vs2017.vcxproj             |   4 +-
 .../nvJPEG_encoder_vs2019.vcxproj             |   4 +-
 Samples/p2pBandwidthLatencyTest/README.md     |   2 +-
 .../p2pBandwidthLatencyTest_vs2017.vcxproj    |   4 +-
 .../p2pBandwidthLatencyTest_vs2019.vcxproj    |   4 +-
 Samples/reduction/README.md                   |   2 +-
 Samples/reduction/reduction_vs2017.vcxproj    |   4 +-
 Samples/reduction/reduction_vs2019.vcxproj    |   4 +-
 Samples/shfl_scan/README.md                   |   2 +-
 Samples/shfl_scan/shfl_scan_vs2017.vcxproj    |   4 +-
 Samples/shfl_scan/shfl_scan_vs2019.vcxproj    |   4 +-
 Samples/simpleAWBarrier/README.md             |   2 +-
 .../simpleAWBarrier_vs2017.vcxproj            |   4 +-
 .../simpleAWBarrier_vs2019.vcxproj            |   4 +-
 Samples/simpleAttributes/README.md            |   2 +-
 .../simpleAttributes_vs2017.vcxproj           |   4 +-
 .../simpleAttributes_vs2019.vcxproj           |   4 +-
 Samples/simpleCUBLAS/Makefile                 |  29 +-
 Samples/simpleCUBLAS/README.md                |   2 +-
 .../simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj  |   4 +-
 .../simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj  |   4 +-
 Samples/simpleCUBLASXT/Makefile               |   6 -
 Samples/simpleCUBLASXT/README.md              |   2 +-
 .../simpleCUBLASXT_vs2017.vcxproj             |   4 +-
 .../simpleCUBLASXT_vs2019.vcxproj             |   4 +-
 Samples/simpleCUBLAS_LU/Makefile              |   6 -
 Samples/simpleCUBLAS_LU/README.md             |   2 +-
 .../simpleCUBLAS_LU_vs2017.vcxproj            |   4 +-
 .../simpleCUBLAS_LU_vs2019.vcxproj            |   4 +-
 Samples/simpleCUFFT/Makefile                  |   6 -
 Samples/simpleCUFFT/README.md                 |   2 +-
 .../simpleCUFFT/simpleCUFFT_vs2017.vcxproj    |   4 +-
 .../simpleCUFFT/simpleCUFFT_vs2019.vcxproj    |   4 +-
 Samples/simpleCudaGraphs/README.md            |   2 +-
 .../simpleCudaGraphs_vs2017.vcxproj           |   4 +-
 .../simpleCudaGraphs_vs2019.vcxproj           |   4 +-
 Samples/simpleD3D11/README.md                 |   2 +-
 .../simpleD3D11/simpleD3D11_vs2017.vcxproj    |   4 +-
 .../simpleD3D11/simpleD3D11_vs2019.vcxproj    |   4 +-
 Samples/simpleD3D12/README.md                 |   2 +-
 .../simpleD3D12/simpleD3D12_vs2017.vcxproj    |   4 +-
 .../simpleD3D12/simpleD3D12_vs2019.vcxproj    |   4 +-
 Samples/simpleDrvRuntime/README.md            |   2 +-
 .../simpleDrvRuntime_vs2017.vcxproj           |   4 +-
 .../simpleDrvRuntime_vs2019.vcxproj           |   4 +-
 Samples/simpleGL/README.md                    |   2 +-
 Samples/simpleGL/simpleGL_vs2017.vcxproj      |   4 +-
 Samples/simpleGL/simpleGL_vs2019.vcxproj      |   4 +-
 Samples/simpleIPC/README.md                   |   2 +-
 Samples/simpleIPC/simpleIPC_vs2017.vcxproj    |   4 +-
 Samples/simpleIPC/simpleIPC_vs2019.vcxproj    |   4 +-
 Samples/simpleVoteIntrinsics/README.md        |   2 +-
 .../simpleVoteIntrinsics_vs2017.vcxproj       |   4 +-
 .../simpleVoteIntrinsics_vs2019.vcxproj       |   4 +-
 Samples/simpleVulkan/README.md                |   2 +-
 .../simpleVulkan/simpleVulkan_vs2017.vcxproj  |   4 +-
 .../simpleVulkan/simpleVulkan_vs2019.vcxproj  |   4 +-
 Samples/simpleVulkanMMAP/README.md            |   2 +-
 .../simpleVulkanMMAP_vs2017.vcxproj           |   4 +-
 .../simpleVulkanMMAP_vs2019.vcxproj           |   4 +-
 Samples/simpleZeroCopy/README.md              |   2 +-
 .../simpleZeroCopy_vs2017.vcxproj             |   4 +-
 .../simpleZeroCopy_vs2019.vcxproj             |   4 +-
 Samples/streamOrderedAllocation/README.md     |   2 +-
 .../streamOrderedAllocation_vs2017.vcxproj    |   4 +-
 .../streamOrderedAllocation_vs2019.vcxproj    |   4 +-
 Samples/streamOrderedAllocationIPC/README.md  |   2 +-
 Samples/streamOrderedAllocationP2P/README.md  |   2 +-
 .../streamOrderedAllocationP2P_vs2017.vcxproj |   4 +-
 .../streamOrderedAllocationP2P_vs2019.vcxproj |   4 +-
 Samples/systemWideAtomics/README.md           |   2 +-
 Samples/tf32TensorCoreGemm/README.md          |   2 +-
 .../tf32TensorCoreGemm_vs2017.vcxproj         |   4 +-
 .../tf32TensorCoreGemm_vs2019.vcxproj         |   4 +-
 Samples/vectorAddMMAP/README.md               |   2 +-
 .../vectorAddMMAP_vs2017.vcxproj              |   4 +-
 .../vectorAddMMAP_vs2019.vcxproj              |   4 +-
 Samples/vectorAdd_nvrtc/README.md             |   2 +-
 .../vectorAdd_nvrtc_vs2017.vcxproj            |   4 +-
 .../vectorAdd_nvrtc_vs2019.vcxproj            |   4 +-
 Samples/vulkanImageCUDA/README.md             |   2 +-
 .../vulkanImageCUDA_vs2017.vcxproj            |   4 +-
 .../vulkanImageCUDA_vs2019.vcxproj            |   4 +-
 Samples/warpAggregatedAtomicsCG/README.md     |   2 +-
 .../warpAggregatedAtomicsCG_vs2017.vcxproj    |   4 +-
 .../warpAggregatedAtomicsCG_vs2019.vcxproj    |   4 +-
 Samples/watershedSegmentationNPP/Makefile     |   6 -
 Samples/watershedSegmentationNPP/README.md    |   2 +-
 .../watershedSegmentationNPP_vs2017.vcxproj   |   4 +-
 .../watershedSegmentationNPP_vs2019.vcxproj   |   4 +-
 193 files changed, 1844 insertions(+), 431 deletions(-)
 create mode 100644 Samples/cdpQuadtree/Makefile
 create mode 100644 Samples/cdpQuadtree/NsightEclipse.xml
 create mode 100644 Samples/cdpQuadtree/README.md
 create mode 100644 Samples/cdpQuadtree/cdpQuadtree.cu
 create mode 100644 Samples/cdpQuadtree/cdpQuadtree_vs2017.sln
 create mode 100644 Samples/cdpQuadtree/cdpQuadtree_vs2017.vcxproj
 create mode 100644 Samples/cdpQuadtree/cdpQuadtree_vs2019.sln
 create mode 100644 Samples/cdpQuadtree/cdpQuadtree_vs2019.vcxproj

diff --git a/README.md b/README.md
index a4fafa7f..5efaefc0 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,15 @@
 # CUDA Samples
 
-Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads).
+Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads).
 
 ## Release Notes
 
 This section describes the release notes for the CUDA Samples on GitHub only.
 
+### CUDA 11.4
+* Added `cdpQuadtree`. Demonstrates Quad Trees implementation using CUDA Dynamic Parallelism.
+* Updated `simpleVulkan`, `simpleVulkanMMAP` and `vulkanImageCUDA`. Demonstrates use of SPIR-V shaders.
+
 ### CUDA 11.3
 *  Added `streamOrderedAllocationIPC`. Demonstrates Inter Process Communication using one process per GPU for computation.
 *  Added `simpleCUBLAS_LU`. Demonstrates batched matrix LU decomposition using cuBLAS API `cublas<t>getrfBatched()`
@@ -109,7 +113,7 @@ This is the first release of CUDA Samples on GitHub:
 
 ### Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), and the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).
 
 ### Getting the CUDA Samples
@@ -166,39 +170,39 @@ The samples makefiles can take advantage of certain options:
 ### Samples by OS
 
 #### Linux
-**[bandwidthTest](./Samples/bandwidthTest)** | **[batchedLabelMarkersAndLabelCompressionNPP](./Samples/batchedLabelMarkersAndLabelCompressionNPP)** | **[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** | **[binaryPartitionCG](./Samples/binaryPartitionCG)** |
+**[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[shfl_scan](./Samples/shfl_scan)** | **[binaryPartitionCG](./Samples/binaryPartitionCG)** | **[concurrentKernels](./Samples/concurrentKernels)** |
 ---|---|---|---|
-**[boxFilterNPP](./Samples/boxFilterNPP)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** | **[concurrentKernels](./Samples/concurrentKernels)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** |
-**[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** | **[cudaNvSci](./Samples/cudaNvSci)** |
-**[cudaNvSciNvMedia](./Samples/cudaNvSciNvMedia)** | **[cudaOpenMP](./Samples/cudaOpenMP)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** |
-**[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** | **[deviceQuery](./Samples/deviceQuery)** | **[dmmaTensorCoreGemm](./Samples/dmmaTensorCoreGemm)** | **[EGLStream_CUDA_Interop](./Samples/EGLStream_CUDA_Interop)** |
-**[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** | **[matrixMul](./Samples/matrixMul)** |
-**[matrixMulDrv](./Samples/matrixMulDrv)** | **[memMapIPCDrv](./Samples/memMapIPCDrv)** | **[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** |
-**[nvJPEG](./Samples/nvJPEG)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[reduction](./Samples/reduction)** |
-**[shfl_scan](./Samples/shfl_scan)** | **[simpleAttributes](./Samples/simpleAttributes)** | **[simpleAWBarrier](./Samples/simpleAWBarrier)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** |
-**[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[simpleCUBLAS_LU](./Samples/simpleCUBLAS_LU)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[simpleCUFFT](./Samples/simpleCUFFT)** |
-**[simpleDrvRuntime](./Samples/simpleDrvRuntime)** | **[simpleGL](./Samples/simpleGL)** | **[simpleIPC](./Samples/simpleIPC)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** |
-**[simpleVulkan](./Samples/simpleVulkan)** | **[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[simpleZeroCopy](./Samples/simpleZeroCopy)** | **[streamOrderedAllocation](./Samples/streamOrderedAllocation)** |
-**[streamOrderedAllocationIPC](./Samples/streamOrderedAllocationIPC)** | **[streamOrderedAllocationP2P](./Samples/streamOrderedAllocationP2P)** | **[systemWideAtomics](./Samples/systemWideAtomics)** | **[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** |
-**[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[vectorAddMMAP](./Samples/vectorAddMMAP)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** |
-**[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[watershedSegmentationNPP](./Samples/watershedSegmentationNPP)** |
+**[streamOrderedAllocationIPC](./Samples/streamOrderedAllocationIPC)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** |
+**[nvJPEG](./Samples/nvJPEG)** | **[watershedSegmentationNPP](./Samples/watershedSegmentationNPP)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[boxFilterNPP](./Samples/boxFilterNPP)** |
+**[matrixMul](./Samples/matrixMul)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[simpleCUFFT](./Samples/simpleCUFFT)** |
+**[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[cudaOpenMP](./Samples/cudaOpenMP)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[cudaNvSci](./Samples/cudaNvSci)** |
+**[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[simpleVulkan](./Samples/simpleVulkan)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** |
+**[streamOrderedAllocationP2P](./Samples/streamOrderedAllocationP2P)** | **[dmmaTensorCoreGemm](./Samples/dmmaTensorCoreGemm)** | **[EGLStream_CUDA_Interop](./Samples/EGLStream_CUDA_Interop)** | **[vectorAddMMAP](./Samples/vectorAddMMAP)** |
+**[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** | **[streamOrderedAllocation](./Samples/streamOrderedAllocation)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** |
+**[simpleAWBarrier](./Samples/simpleAWBarrier)** | **[simpleGL](./Samples/simpleGL)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** |
+**[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** | **[deviceQuery](./Samples/deviceQuery)** | **[systemWideAtomics](./Samples/systemWideAtomics)** | **[matrixMulDrv](./Samples/matrixMulDrv)** |
+**[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[simpleZeroCopy](./Samples/simpleZeroCopy)** | **[simpleAttributes](./Samples/simpleAttributes)** | **[batchedLabelMarkersAndLabelCompressionNPP](./Samples/batchedLabelMarkersAndLabelCompressionNPP)** |
+**[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[simpleIPC](./Samples/simpleIPC)** | **[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[cudaNvSciNvMedia](./Samples/cudaNvSciNvMedia)** |
+**[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLAS_LU](./Samples/simpleCUBLAS_LU)** | **[bandwidthTest](./Samples/bandwidthTest)** | **[cdpQuadtree](./Samples/cdpQuadtree)** |
+**[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** | **[reduction](./Samples/reduction)** |
+**[memMapIPCDrv](./Samples/memMapIPCDrv)** | **[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** |
 
 #### Windows
-**[bandwidthTest](./Samples/bandwidthTest)** | **[batchedLabelMarkersAndLabelCompressionNPP](./Samples/batchedLabelMarkersAndLabelCompressionNPP)** | **[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** | **[binaryPartitionCG](./Samples/binaryPartitionCG)** |
+**[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[shfl_scan](./Samples/shfl_scan)** | **[binaryPartitionCG](./Samples/binaryPartitionCG)** | **[concurrentKernels](./Samples/concurrentKernels)** |
 ---|---|---|---|
-**[boxFilterNPP](./Samples/boxFilterNPP)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** | **[concurrentKernels](./Samples/concurrentKernels)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** |
-**[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** | **[cudaOpenMP](./Samples/cudaOpenMP)** |
-**[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** | **[deviceQuery](./Samples/deviceQuery)** |
-**[dmmaTensorCoreGemm](./Samples/dmmaTensorCoreGemm)** | **[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** |
-**[matrixMul](./Samples/matrixMul)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[memMapIPCDrv](./Samples/memMapIPCDrv)** | **[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** |
-**[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[nvJPEG](./Samples/nvJPEG)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** |
-**[reduction](./Samples/reduction)** | **[shfl_scan](./Samples/shfl_scan)** | **[simpleAttributes](./Samples/simpleAttributes)** | **[simpleAWBarrier](./Samples/simpleAWBarrier)** |
-**[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[simpleCUBLAS_LU](./Samples/simpleCUBLAS_LU)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** |
-**[simpleCUFFT](./Samples/simpleCUFFT)** | **[simpleD3D11](./Samples/simpleD3D11)** | **[simpleD3D12](./Samples/simpleD3D12)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** |
-**[simpleGL](./Samples/simpleGL)** | **[simpleIPC](./Samples/simpleIPC)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleVulkan](./Samples/simpleVulkan)** |
-**[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[simpleZeroCopy](./Samples/simpleZeroCopy)** | **[streamOrderedAllocation](./Samples/streamOrderedAllocation)** | **[streamOrderedAllocationP2P](./Samples/streamOrderedAllocationP2P)** |
-**[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[vectorAddMMAP](./Samples/vectorAddMMAP)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** |
-**[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[watershedSegmentationNPP](./Samples/watershedSegmentationNPP)** |
+**[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** | **[nvJPEG](./Samples/nvJPEG)** |
+**[watershedSegmentationNPP](./Samples/watershedSegmentationNPP)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[boxFilterNPP](./Samples/boxFilterNPP)** | **[matrixMul](./Samples/matrixMul)** |
+**[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[simpleCUFFT](./Samples/simpleCUFFT)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** |
+**[cudaOpenMP](./Samples/cudaOpenMP)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[simpleVulkan](./Samples/simpleVulkan)** |
+**[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** | **[streamOrderedAllocationP2P](./Samples/streamOrderedAllocationP2P)** | **[dmmaTensorCoreGemm](./Samples/dmmaTensorCoreGemm)** |
+**[vectorAddMMAP](./Samples/vectorAddMMAP)** | **[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** | **[streamOrderedAllocation](./Samples/streamOrderedAllocation)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** |
+**[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** | **[simpleAWBarrier](./Samples/simpleAWBarrier)** | **[simpleGL](./Samples/simpleGL)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** |
+**[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[simpleD3D11](./Samples/simpleD3D11)** | **[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** | **[deviceQuery](./Samples/deviceQuery)** |
+**[matrixMulDrv](./Samples/matrixMulDrv)** | **[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[simpleZeroCopy](./Samples/simpleZeroCopy)** | **[simpleAttributes](./Samples/simpleAttributes)** |
+**[simpleD3D12](./Samples/simpleD3D12)** | **[batchedLabelMarkersAndLabelCompressionNPP](./Samples/batchedLabelMarkersAndLabelCompressionNPP)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[simpleIPC](./Samples/simpleIPC)** |
+**[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLAS_LU](./Samples/simpleCUBLAS_LU)** | **[bandwidthTest](./Samples/bandwidthTest)** |
+**[cdpQuadtree](./Samples/cdpQuadtree)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** |
+**[reduction](./Samples/reduction)** | **[memMapIPCDrv](./Samples/memMapIPCDrv)** | **[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** |
 
 ## Dependencies
 
diff --git a/Samples/EGLStream_CUDA_Interop/Makefile b/Samples/EGLStream_CUDA_Interop/Makefile
index 1e901d99..010ce65c 100644
--- a/Samples/EGLStream_CUDA_Interop/Makefile
+++ b/Samples/EGLStream_CUDA_Interop/Makefile
@@ -285,12 +285,6 @@ ifeq ($(TARGET_OS),android)
   SAMPLE_ENABLED := 0
 endif
 
-# This sample is not supported on QNX
-ifeq ($(TARGET_OS),qnx)
-  $(info >>> WARNING - EGLStream_CUDA_Interop is not supported on QNX - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
 ALL_LDFLAGS :=
 ALL_LDFLAGS += $(ALL_CCFLAGS)
 ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
diff --git a/Samples/EGLStream_CUDA_Interop/README.md b/Samples/EGLStream_CUDA_Interop/README.md
index 2a0f654d..204c6e4f 100644
--- a/Samples/EGLStream_CUDA_Interop/README.md
+++ b/Samples/EGLStream_CUDA_Interop/README.md
@@ -30,7 +30,7 @@ cuDeviceGet, cuDeviceGetAttribute, cuDeviceComputeCapability, cuDeviceGetCount,
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/MersenneTwisterGP11213/Makefile b/Samples/MersenneTwisterGP11213/Makefile
index fb3aa590..e40b5b99 100644
--- a/Samples/MersenneTwisterGP11213/Makefile
+++ b/Samples/MersenneTwisterGP11213/Makefile
@@ -263,14 +263,6 @@ ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
 ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
 ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
 
-SAMPLE_ENABLED := 1
-
-# This sample is not supported on QNX
-ifeq ($(TARGET_OS),qnx)
-  $(info >>> WARNING - MersenneTwisterGP11213 is not supported on QNX - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
 ALL_LDFLAGS :=
 ALL_LDFLAGS += $(ALL_CCFLAGS)
 ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
@@ -305,10 +297,6 @@ ALL_CCFLAGS += --threads 0
 
 LIBRARIES += -lcurand_static -lculibos
 
-ifeq ($(SAMPLE_ENABLED),0)
-EXEC ?= @echo "[@]"
-endif
-
 ################################################################################
 
 # Target rules
@@ -316,23 +304,16 @@ all: build
 
 build: MersenneTwisterGP11213
 
-check.deps:
-ifeq ($(SAMPLE_ENABLED),0)
-	@echo "Sample will be waived due to the above missing dependencies"
-else
-	@echo "Sample is ready - all dependencies have been met"
-endif
-
 MersenneTwister.o:MersenneTwister.cpp
-	$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
+	$(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
 
 MersenneTwisterGP11213: MersenneTwister.o
-	$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
-	$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
-	$(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
+	$(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
+	mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
+	cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
 
 run: build
-	$(EXEC) ./MersenneTwisterGP11213
+	./MersenneTwisterGP11213
 
 clean:
 	rm -f MersenneTwisterGP11213 MersenneTwister.o
diff --git a/Samples/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj b/Samples/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj
index e39c60a4..07a7c276 100644
--- a/Samples/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj
+++ b/Samples/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj b/Samples/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj
index 8648205f..ffe973b8 100644
--- a/Samples/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj
+++ b/Samples/MersenneTwisterGP11213/MersenneTwisterGP11213_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/MersenneTwisterGP11213/README.md b/Samples/MersenneTwisterGP11213/README.md
index eb8bd797..24a482d8 100644
--- a/Samples/MersenneTwisterGP11213/README.md
+++ b/Samples/MersenneTwisterGP11213/README.md
@@ -27,7 +27,7 @@ x86_64, ppc64le, armv7l
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj b/Samples/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj
index 3bbad98a..18faacc5 100644
--- a/Samples/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj
+++ b/Samples/NV12toBGRandResize/NV12toBGRandResize_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj b/Samples/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj
index a5149390..78191a45 100644
--- a/Samples/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj
+++ b/Samples/NV12toBGRandResize/NV12toBGRandResize_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/NV12toBGRandResize/README.md b/Samples/NV12toBGRandResize/README.md
index 8070aaf0..28d44a1b 100644
--- a/Samples/NV12toBGRandResize/README.md
+++ b/Samples/NV12toBGRandResize/README.md
@@ -27,7 +27,7 @@ cudaMemcpy2D, cudaMallocManaged
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/UnifiedMemoryPerf/README.md b/Samples/UnifiedMemoryPerf/README.md
index 43cecf11..3a054229 100644
--- a/Samples/UnifiedMemoryPerf/README.md
+++ b/Samples/UnifiedMemoryPerf/README.md
@@ -28,7 +28,7 @@ cudaMallocManaged, cudaStreamAttachMemAsync, cudaMemcpyAsync, cudaMallocHost, cu
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj b/Samples/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj
index b767c25f..a8e56e5f 100644
--- a/Samples/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj
+++ b/Samples/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -111,6 +111,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj b/Samples/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj
index cfcb126c..1e740893 100644
--- a/Samples/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj
+++ b/Samples/UnifiedMemoryPerf/UnifiedMemoryPerf_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -107,6 +107,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/bandwidthTest/README.md b/Samples/bandwidthTest/README.md
index 8f70b9c0..d5546001 100644
--- a/Samples/bandwidthTest/README.md
+++ b/Samples/bandwidthTest/README.md
@@ -27,7 +27,7 @@ cudaSetDevice, cudaHostAlloc, cudaFree, cudaMallocHost, cudaFreeHost, cudaMemcpy
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/bandwidthTest/bandwidthTest_vs2017.vcxproj b/Samples/bandwidthTest/bandwidthTest_vs2017.vcxproj
index c6979275..22944af3 100644
--- a/Samples/bandwidthTest/bandwidthTest_vs2017.vcxproj
+++ b/Samples/bandwidthTest/bandwidthTest_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/bandwidthTest/bandwidthTest_vs2019.vcxproj b/Samples/bandwidthTest/bandwidthTest_vs2019.vcxproj
index 40850f7e..786e0a3b 100644
--- a/Samples/bandwidthTest/bandwidthTest_vs2019.vcxproj
+++ b/Samples/bandwidthTest/bandwidthTest_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/batchedLabelMarkersAndLabelCompressionNPP/Makefile b/Samples/batchedLabelMarkersAndLabelCompressionNPP/Makefile
index fccab0a1..00ee41fa 100644
--- a/Samples/batchedLabelMarkersAndLabelCompressionNPP/Makefile
+++ b/Samples/batchedLabelMarkersAndLabelCompressionNPP/Makefile
@@ -271,12 +271,6 @@ ifeq ($(TARGET_OS),darwin)
   SAMPLE_ENABLED := 0
 endif
 
-# This sample is not supported on QNX
-ifeq ($(TARGET_OS),qnx)
-  $(info >>> WARNING - batchedLabelMarkersAndLabelCompressionNPP is not supported on QNX - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
 ALL_LDFLAGS :=
 ALL_LDFLAGS += $(ALL_CCFLAGS)
 ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
diff --git a/Samples/batchedLabelMarkersAndLabelCompressionNPP/README.md b/Samples/batchedLabelMarkersAndLabelCompressionNPP/README.md
index 16270de7..240c8efe 100644
--- a/Samples/batchedLabelMarkersAndLabelCompressionNPP/README.md
+++ b/Samples/batchedLabelMarkersAndLabelCompressionNPP/README.md
@@ -28,7 +28,7 @@ x86_64, ppc64le, armv7l
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj b/Samples/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj
index 0f34cd13..aa91fca2 100644
--- a/Samples/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj
+++ b/Samples/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj b/Samples/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj
index 22205f6b..74ec541d 100644
--- a/Samples/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj
+++ b/Samples/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/bf16TensorCoreGemm/README.md b/Samples/bf16TensorCoreGemm/README.md
index 5a51bb4e..8910ac7f 100644
--- a/Samples/bf16TensorCoreGemm/README.md
+++ b/Samples/bf16TensorCoreGemm/README.md
@@ -27,7 +27,7 @@ cudaMallocManaged, cudaDeviceSynchronize, cudaFuncSetAttribute, cudaEventCreate,
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj b/Samples/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj
index 156376ad..e814745b 100644
--- a/Samples/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj
+++ b/Samples/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj b/Samples/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj
index 1146105a..fc0b68f7 100644
--- a/Samples/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj
+++ b/Samples/bf16TensorCoreGemm/bf16TensorCoreGemm_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/binaryPartitionCG/README.md b/Samples/binaryPartitionCG/README.md
index 98c3418d..f7d335fd 100644
--- a/Samples/binaryPartitionCG/README.md
+++ b/Samples/binaryPartitionCG/README.md
@@ -24,7 +24,7 @@ x86_64, ppc64le, armv7l
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj b/Samples/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj
index 2399c9ec..51aa64df 100644
--- a/Samples/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj
+++ b/Samples/binaryPartitionCG/binaryPartitionCG_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj b/Samples/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj
index fe7bb11f..bdee4499 100644
--- a/Samples/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj
+++ b/Samples/binaryPartitionCG/binaryPartitionCG_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/boxFilterNPP/README.md b/Samples/boxFilterNPP/README.md
index 54f26d6a..4824e0e4 100644
--- a/Samples/boxFilterNPP/README.md
+++ b/Samples/boxFilterNPP/README.md
@@ -27,7 +27,7 @@ x86_64, ppc64le, armv7l
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/boxFilterNPP/boxFilterNPP_vs2017.vcxproj b/Samples/boxFilterNPP/boxFilterNPP_vs2017.vcxproj
index 580c3df5..8772ff10 100644
--- a/Samples/boxFilterNPP/boxFilterNPP_vs2017.vcxproj
+++ b/Samples/boxFilterNPP/boxFilterNPP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/boxFilterNPP/boxFilterNPP_vs2019.vcxproj b/Samples/boxFilterNPP/boxFilterNPP_vs2019.vcxproj
index 91f4db2d..6499c47d 100644
--- a/Samples/boxFilterNPP/boxFilterNPP_vs2019.vcxproj
+++ b/Samples/boxFilterNPP/boxFilterNPP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/cannyEdgeDetectorNPP/README.md b/Samples/cannyEdgeDetectorNPP/README.md
index 0c969c8e..7e88a000 100644
--- a/Samples/cannyEdgeDetectorNPP/README.md
+++ b/Samples/cannyEdgeDetectorNPP/README.md
@@ -27,7 +27,7 @@ x86_64, ppc64le, armv7l
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj b/Samples/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj
index f0140b6a..f4166ea6 100644
--- a/Samples/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj
+++ b/Samples/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj b/Samples/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj
index f919b081..6f6c4407 100644
--- a/Samples/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj
+++ b/Samples/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/cdpQuadtree/Makefile b/Samples/cdpQuadtree/Makefile
new file mode 100644
index 00000000..56cec011
--- /dev/null
+++ b/Samples/cdpQuadtree/Makefile
@@ -0,0 +1,370 @@
+################################################################################
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+################################################################################
+#
+# Makefile project only supported on Mac OS X and Linux Platforms)
+#
+################################################################################
+
+# Location of the CUDA Toolkit
+CUDA_PATH ?= /usr/local/cuda
+
+##############################
+# start deprecated interface #
+##############################
+ifeq ($(x86_64),1)
+    $(info WARNING - x86_64 variable has been deprecated)
+    $(info WARNING - please use TARGET_ARCH=x86_64 instead)
+    TARGET_ARCH ?= x86_64
+endif
+ifeq ($(ARMv7),1)
+    $(info WARNING - ARMv7 variable has been deprecated)
+    $(info WARNING - please use TARGET_ARCH=armv7l instead)
+    TARGET_ARCH ?= armv7l
+endif
+ifeq ($(aarch64),1)
+    $(info WARNING - aarch64 variable has been deprecated)
+    $(info WARNING - please use TARGET_ARCH=aarch64 instead)
+    TARGET_ARCH ?= aarch64
+endif
+ifeq ($(ppc64le),1)
+    $(info WARNING - ppc64le variable has been deprecated)
+    $(info WARNING - please use TARGET_ARCH=ppc64le instead)
+    TARGET_ARCH ?= ppc64le
+endif
+ifneq ($(GCC),)
+    $(info WARNING - GCC variable has been deprecated)
+    $(info WARNING - please use HOST_COMPILER=$(GCC) instead)
+    HOST_COMPILER ?= $(GCC)
+endif
+ifneq ($(abi),)
+    $(error ERROR - abi variable has been removed)
+endif
+############################
+# end deprecated interface #
+############################
+
+# architecture
+HOST_ARCH   := $(shell uname -m)
+TARGET_ARCH ?= $(HOST_ARCH)
+ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
+    ifneq ($(TARGET_ARCH),$(HOST_ARCH))
+        ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
+            TARGET_SIZE := 64
+        else ifneq (,$(filter $(TARGET_ARCH),armv7l))
+            TARGET_SIZE := 32
+        endif
+    else
+        TARGET_SIZE := $(shell getconf LONG_BIT)
+    endif
+else
+    $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
+endif
+
+# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
+ifeq ($(HOST_ARCH),aarch64)
+    ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
+        HOST_ARCH := sbsa
+        TARGET_ARCH := sbsa
+    endif
+endif
+
+ifneq ($(TARGET_ARCH),$(HOST_ARCH))
+    ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
+        $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
+    endif
+endif
+
+# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
+ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
+    TARGET_ARCH = armv7l
+endif
+
+# operating system
+HOST_OS   := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
+TARGET_OS ?= $(HOST_OS)
+ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
+    $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
+endif
+
+# host compiler
+ifeq ($(TARGET_OS),darwin)
+    ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
+        HOST_COMPILER ?= clang++
+    endif
+else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
+    ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
+        ifeq ($(TARGET_OS),linux)
+            HOST_COMPILER ?= arm-linux-gnueabihf-g++
+        else ifeq ($(TARGET_OS),qnx)
+            ifeq ($(QNX_HOST),)
+                $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
+            endif
+            ifeq ($(QNX_TARGET),)
+                $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
+            endif
+            export QNX_HOST
+            export QNX_TARGET
+            HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
+        else ifeq ($(TARGET_OS),android)
+            HOST_COMPILER ?= arm-linux-androideabi-g++
+        endif
+    else ifeq ($(TARGET_ARCH),aarch64)
+        ifeq ($(TARGET_OS), linux)
+            HOST_COMPILER ?= aarch64-linux-gnu-g++
+        else ifeq ($(TARGET_OS),qnx)
+            ifeq ($(QNX_HOST),)
+                $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
+            endif
+            ifeq ($(QNX_TARGET),)
+                $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
+            endif
+            export QNX_HOST
+            export QNX_TARGET
+            HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
+        else ifeq ($(TARGET_OS), android)
+            HOST_COMPILER ?= aarch64-linux-android-clang++
+        endif
+    else ifeq ($(TARGET_ARCH),sbsa)
+        HOST_COMPILER ?= aarch64-linux-gnu-g++
+    else ifeq ($(TARGET_ARCH),ppc64le)
+        HOST_COMPILER ?= powerpc64le-linux-gnu-g++
+    endif
+endif
+HOST_COMPILER ?= g++
+NVCC          := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
+
+# internal flags
+NVCCFLAGS   := -m${TARGET_SIZE}
+CCFLAGS     :=
+LDFLAGS     :=
+
+# build flags
+ifeq ($(TARGET_OS),darwin)
+    LDFLAGS += -rpath $(CUDA_PATH)/lib
+    CCFLAGS += -arch $(HOST_ARCH)
+else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
+    LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
+    CCFLAGS += -mfloat-abi=hard
+else ifeq ($(TARGET_OS),android)
+    LDFLAGS += -pie
+    CCFLAGS += -fpie -fpic -fexceptions
+endif
+
+ifneq ($(TARGET_ARCH),$(HOST_ARCH))
+    ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
+        ifneq ($(TARGET_FS),)
+            GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
+            ifeq ($(GCCVERSIONLTEQ46),1)
+                CCFLAGS += --sysroot=$(TARGET_FS)
+            endif
+            LDFLAGS += --sysroot=$(TARGET_FS)
+            LDFLAGS += -rpath-link=$(TARGET_FS)/lib
+            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
+            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
+        endif
+    endif
+    ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
+        ifneq ($(TARGET_FS),)
+            GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
+            ifeq ($(GCCVERSIONLTEQ46),1)
+                CCFLAGS += --sysroot=$(TARGET_FS)
+            endif
+            LDFLAGS += --sysroot=$(TARGET_FS)
+            LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
+            LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
+            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
+            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
+            LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
+            CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
+            CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
+        endif
+    endif
+    ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
+        NVCCFLAGS += --qpp-config 5.4.0,gcc_ntoaarch64le
+        CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
+        LDFLAGS += -lsocket
+        LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
+        CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
+        ifdef TARGET_OVERRIDE
+            LDFLAGS += -lslog2
+        endif
+
+        ifneq ($(TARGET_FS),)
+            LDFLAGS += -L$(TARGET_FS)/usr/lib
+            CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
+            LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
+            CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
+            CCFLAGS += -I$(TARGET_FS)/../include
+        endif
+    endif
+endif
+
+ifdef TARGET_OVERRIDE # cuda toolkit targets override
+    NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
+endif
+
+# Install directory of different arch
+CUDA_INSTALL_TARGET_DIR :=
+ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
+    CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
+else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
+    CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
+else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
+    CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
+else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
+    CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
+else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
+    CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
+else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
+    CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
+else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
+    CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
+else ifeq ($(TARGET_ARCH),ppc64le)
+    CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
+endif
+
+# Debug build flags
+ifeq ($(dbg),1)
+      NVCCFLAGS += -g -G
+      BUILD_TYPE := debug
+else
+      BUILD_TYPE := release
+endif
+
+ALL_CCFLAGS :=
+ALL_CCFLAGS += $(NVCCFLAGS)
+ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
+ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
+ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
+
+SAMPLE_ENABLED := 1
+
+# This sample is not supported on QNX
+ifeq ($(TARGET_OS),qnx)
+  $(info >>> WARNING - cdpQuadtree is not supported on QNX - waiving sample <<<)
+  SAMPLE_ENABLED := 0
+endif
+
+ALL_LDFLAGS :=
+ALL_LDFLAGS += $(ALL_CCFLAGS)
+ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
+ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
+
+# Common includes and paths for CUDA
+INCLUDES  := -I../../Common
+LIBRARIES :=
+
+################################################################################
+
+#Detect if installed version of GCC supports required C++14
+ifeq ($(TARGET_OS),linux)
+    empty :=
+    space := $(empty) $(empty)
+    GCCVERSIONSTRING := $(shell expr `$(HOST_COMPILER) -dumpversion`)
+#Create version number without "."
+    GCCVERSION := $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f1 -d.)
+    GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f2 -d.)
+    GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f3 -d.)
+# Make sure the version number has at least 3 decimals
+    GCCVERSION += 00
+# Remove spaces from the version number
+    GCCVERSION := $(subst $(space),$(empty),$(GCCVERSION))
+#$(warning $(GCCVERSION))
+
+    IS_MIN_VERSION := $(shell expr `echo $(GCCVERSION)` \>= 50000)
+
+    ifeq ($(IS_MIN_VERSION), 1)
+        $(info >>> GCC Version is greater or equal to 5.0.0 <<<)
+    else
+        $(info >>> Waiving build. Minimum GCC version required is 5.0.0<<<)
+        SAMPLE_ENABLED := 0
+    endif
+endif
+
+# Gencode arguments
+ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
+SMS ?= 35 37 50 52 60 61 70 72 75 80 86
+else
+SMS ?= 35 37 50 52 60 61 70 75 80 86
+endif
+
+ifeq ($(SMS),)
+$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
+SAMPLE_ENABLED := 0
+endif
+
+ifeq ($(GENCODE_FLAGS),)
+# Generate SASS code for each SM architecture listed in $(SMS)
+$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
+
+# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
+HIGHEST_SM := $(lastword $(sort $(SMS)))
+ifneq ($(HIGHEST_SM),)
+GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
+endif
+endif
+
+ALL_CCFLAGS += -dc --std=c++14 --threads 0
+
+LIBRARIES += -lcudadevrt
+
+ifeq ($(SAMPLE_ENABLED),0)
+EXEC ?= @echo "[@]"
+endif
+
+################################################################################
+
+# Target rules
+all: build
+
+build: cdpQuadtree
+
+check.deps:
+ifeq ($(SAMPLE_ENABLED),0)
+	@echo "Sample will be waived due to the above missing dependencies"
+else
+	@echo "Sample is ready - all dependencies have been met"
+endif
+
+cdpQuadtree.o:cdpQuadtree.cu
+	$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
+
+cdpQuadtree: cdpQuadtree.o
+	$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
+	$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
+	$(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
+
+run: build
+	$(EXEC) ./cdpQuadtree
+
+clean:
+	rm -f cdpQuadtree cdpQuadtree.o
+	rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/cdpQuadtree
+
+clobber: clean
diff --git a/Samples/cdpQuadtree/NsightEclipse.xml b/Samples/cdpQuadtree/NsightEclipse.xml
new file mode 100644
index 00000000..0dd9a41c
--- /dev/null
+++ b/Samples/cdpQuadtree/NsightEclipse.xml
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?> 
+<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
+<entry>
+  <name>cdpQuadtree</name>
+  <cflags>
+    <flag>-dc</flag>
+    <flag>--std=c++14</flag>
+  </cflags>
+  <description><![CDATA[This sample demonstrates Quad Trees implemented using CUDA Dynamic Parallelism. This sample requires devices with compute capability 3.5 or higher.]]></description>
+  <includepaths>
+    <path>./</path>
+    <path>../</path>
+    <path>../../common/inc</path>
+  </includepaths>
+  <keyconcepts>
+    <concept level="basic">Cooperative Groups</concept>
+    <concept level="advanced">CUDA Dynamic Parallelism</concept>
+  </keyconcepts>
+  <keywords>
+    <keyword>GPGPU</keyword>
+    <keyword>CPP14</keyword>
+  </keywords>
+  <libraries>
+    <library>cudadevrt</library>
+  </libraries>
+  <librarypaths>
+  </librarypaths>
+  <nsight_eclipse>true</nsight_eclipse>
+  <primary_file>cdpQuadTree.cu</primary_file>
+  <required_dependencies>
+    <dependency>CDP</dependency>
+  </required_dependencies>
+  <scopes>
+    <scope>1:CUDA Advanced Topics</scope>
+  </scopes>
+  <sm-arch>sm35</sm-arch>
+  <sm-arch>sm37</sm-arch>
+  <sm-arch>sm50</sm-arch>
+  <sm-arch>sm52</sm-arch>
+  <sm-arch>sm60</sm-arch>
+  <sm-arch>sm61</sm-arch>
+  <sm-arch>sm70</sm-arch>
+  <sm-arch>sm72</sm-arch>
+  <sm-arch>sm75</sm-arch>
+  <sm-arch>sm80</sm-arch>
+  <sm-arch>sm86</sm-arch>
+  <supported_envs>
+    <env>
+      <arch>x86_64</arch>
+      <platform>linux</platform>
+    </env>
+    <env>
+      <platform>windows7</platform>
+    </env>
+    <env>
+      <arch>x86_64</arch>
+      <platform>macosx</platform>
+    </env>
+    <env>
+      <arch>arm</arch>
+    </env>
+    <env>
+      <arch>ppc64le</arch>
+      <platform>linux</platform>
+    </env>
+  </supported_envs>
+  <supported_sm_architectures>
+    <from>3.5</from>
+  </supported_sm_architectures>
+  <title>Quad Tree (CUDA Dynamic Parallelism)</title>
+  <type>exe</type>
+</entry>
diff --git a/Samples/cdpQuadtree/README.md b/Samples/cdpQuadtree/README.md
new file mode 100644
index 00000000..80054911
--- /dev/null
+++ b/Samples/cdpQuadtree/README.md
@@ -0,0 +1,71 @@
+# cdpQuadtree - Quad Tree (CUDA Dynamic Parallelism)
+
+## Description
+
+This sample demonstrates Quad Trees implemented using CUDA Dynamic Parallelism. This sample requires devices with compute capability 3.5 or higher.
+
+## Key Concepts
+
+Cooperative Groups, CUDA Dynamic Parallelism
+
+## Supported SM Architectures
+
+[SM 3.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 3.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
+
+## Supported OSes
+
+Linux, Windows
+
+## Supported CPU Architecture
+
+x86_64, ppc64le, armv7l
+
+## CUDA APIs involved
+
+## Dependencies needed to build/run
+[CDP](../../README.md#cdp)
+
+## Prerequisites
+
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Make sure the dependencies mentioned in [Dependencies]() section above are installed.
+
+## Build and Run
+
+### Windows
+The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
+```
+*_vs<version>.sln - for Visual Studio <version>
+```
+Each individual sample has its own set of solution files in its directory:
+
+To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
+> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
+
+### Linux
+The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
+```
+$ cd <sample_dir>
+$ make
+```
+The samples makefiles can take advantage of certain options:
+*  **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l.
+    By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
+`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/>
+    See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
+*   **dbg=1** - build with debug symbols
+    ```
+    $ make dbg=1
+    ```
+*   **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
+    ```
+    $ make SMS="50 60"
+    ```
+
+*  **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
+```
+    $ make HOST_COMPILER=g++
+```
+
+## References (for more details)
+
diff --git a/Samples/cdpQuadtree/cdpQuadtree.cu b/Samples/cdpQuadtree/cdpQuadtree.cu
new file mode 100644
index 00000000..256b1b8f
--- /dev/null
+++ b/Samples/cdpQuadtree/cdpQuadtree.cu
@@ -0,0 +1,742 @@
+/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of NVIDIA CORPORATION nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <thrust/random.h>
+#include <thrust/device_vector.h>
+#include <thrust/host_vector.h>
+#include <cooperative_groups.h>
+
+namespace cg = cooperative_groups;
+#include <helper_cuda.h>
+
+////////////////////////////////////////////////////////////////////////////////
+// A structure of 2D points (structure of arrays).
+////////////////////////////////////////////////////////////////////////////////
+class Points {
+  float *m_x;
+  float *m_y;
+
+ public:
+  // Constructor.
+  __host__ __device__ Points() : m_x(NULL), m_y(NULL) {}
+
+  // Constructor.
+  __host__ __device__ Points(float *x, float *y) : m_x(x), m_y(y) {}
+
+  // Get a point.
+  __host__ __device__ __forceinline__ float2 get_point(int idx) const {
+    return make_float2(m_x[idx], m_y[idx]);
+  }
+
+  // Set a point.
+  __host__ __device__ __forceinline__ void set_point(int idx, const float2 &p) {
+    m_x[idx] = p.x;
+    m_y[idx] = p.y;
+  }
+
+  // Set the pointers.
+  __host__ __device__ __forceinline__ void set(float *x, float *y) {
+    m_x = x;
+    m_y = y;
+  }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// A 2D bounding box
+////////////////////////////////////////////////////////////////////////////////
+class Bounding_box {
+  // Extreme points of the bounding box.
+  float2 m_p_min;
+  float2 m_p_max;
+
+ public:
+  // Constructor. Create a unit box.
+  __host__ __device__ Bounding_box() {
+    m_p_min = make_float2(0.0f, 0.0f);
+    m_p_max = make_float2(1.0f, 1.0f);
+  }
+
+  // Compute the center of the bounding-box.
+  __host__ __device__ void compute_center(float2 &center) const {
+    center.x = 0.5f * (m_p_min.x + m_p_max.x);
+    center.y = 0.5f * (m_p_min.y + m_p_max.y);
+  }
+
+  // The points of the box.
+  __host__ __device__ __forceinline__ const float2 &get_max() const {
+    return m_p_max;
+  }
+
+  __host__ __device__ __forceinline__ const float2 &get_min() const {
+    return m_p_min;
+  }
+
+  // Does a box contain a point.
+  __host__ __device__ bool contains(const float2 &p) const {
+    return p.x >= m_p_min.x && p.x < m_p_max.x && p.y >= m_p_min.y &&
+           p.y < m_p_max.y;
+  }
+
+  // Define the bounding box.
+  __host__ __device__ void set(float min_x, float min_y, float max_x,
+                               float max_y) {
+    m_p_min.x = min_x;
+    m_p_min.y = min_y;
+    m_p_max.x = max_x;
+    m_p_max.y = max_y;
+  }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// A node of a quadree.
+////////////////////////////////////////////////////////////////////////////////
+class Quadtree_node {
+  // The identifier of the node.
+  int m_id;
+  // The bounding box of the tree.
+  Bounding_box m_bounding_box;
+  // The range of points.
+  int m_begin, m_end;
+
+ public:
+  // Constructor.
+  __host__ __device__ Quadtree_node() : m_id(0), m_begin(0), m_end(0) {}
+
+  // The ID of a node at its level.
+  __host__ __device__ int id() const { return m_id; }
+
+  // The ID of a node at its level.
+  __host__ __device__ void set_id(int new_id) { m_id = new_id; }
+
+  // The bounding box.
+  __host__ __device__ __forceinline__ const Bounding_box &bounding_box() const {
+    return m_bounding_box;
+  }
+
+  // Set the bounding box.
+  __host__ __device__ __forceinline__ void set_bounding_box(float min_x,
+                                                            float min_y,
+                                                            float max_x,
+                                                            float max_y) {
+    m_bounding_box.set(min_x, min_y, max_x, max_y);
+  }
+
+  // The number of points in the tree.
+  __host__ __device__ __forceinline__ int num_points() const {
+    return m_end - m_begin;
+  }
+
+  // The range of points in the tree.
+  __host__ __device__ __forceinline__ int points_begin() const {
+    return m_begin;
+  }
+
+  __host__ __device__ __forceinline__ int points_end() const { return m_end; }
+
+  // Define the range for that node.
+  __host__ __device__ __forceinline__ void set_range(int begin, int end) {
+    m_begin = begin;
+    m_end = end;
+  }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Algorithm parameters.
+////////////////////////////////////////////////////////////////////////////////
+struct Parameters {
+  // Choose the right set of points to use as in/out.
+  int point_selector;
+  // The number of nodes at a given level (2^k for level k).
+  int num_nodes_at_this_level;
+  // The recursion depth.
+  int depth;
+  // The max value for depth.
+  const int max_depth;
+  // The minimum number of points in a node to stop recursion.
+  const int min_points_per_node;
+
+  // Constructor set to default values.
+  __host__ __device__ Parameters(int max_depth, int min_points_per_node)
+      : point_selector(0),
+        num_nodes_at_this_level(1),
+        depth(0),
+        max_depth(max_depth),
+        min_points_per_node(min_points_per_node) {}
+
+  // Copy constructor. Changes the values for next iteration.
+  __host__ __device__ Parameters(const Parameters &params, bool)
+      : point_selector((params.point_selector + 1) % 2),
+        num_nodes_at_this_level(4 * params.num_nodes_at_this_level),
+        depth(params.depth + 1),
+        max_depth(params.max_depth),
+        min_points_per_node(params.min_points_per_node) {}
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Build a quadtree on the GPU. Use CUDA Dynamic Parallelism.
+//
+// The algorithm works as follows. The host (CPU) launches one block of
+// NUM_THREADS_PER_BLOCK threads. That block will do the following steps:
+//
+// 1- Check the number of points and its depth.
+//
+// We impose a maximum depth to the tree and a minimum number of points per
+// node. If the maximum depth is exceeded or the minimum number of points is
+// reached. The threads in the block exit.
+//
+// Before exiting, they perform a buffer swap if it is needed. Indeed, the
+// algorithm uses two buffers to permute the points and make sure they are
+// properly distributed in the quadtree. By design we want all points to be
+// in the first buffer of points at the end of the algorithm. It is the reason
+// why we may have to swap the buffer before leavin (if the points are in the
+// 2nd buffer).
+//
+// 2- Count the number of points in each child.
+//
+// If the depth is not too high and the number of points is sufficient, the
+// block has to dispatch the points into four geometrical buckets: Its
+// children. For that purpose, we compute the center of the bounding box and
+// count the number of points in each quadrant.
+//
+// The set of points is divided into sections. Each section is given to a
+// warp of threads (32 threads). Warps use __ballot and __popc intrinsics
+// to count the points. See the Programming Guide for more information about
+// those functions.
+//
+// 3- Scan the warps' results to know the "global" numbers.
+//
+// Warps work independently from each other. At the end, each warp knows the
+// number of points in its section. To know the numbers for the block, the
+// block has to run a scan/reduce at the block level. It's a traditional
+// approach. The implementation in that sample is not as optimized as what
+// could be found in fast radix sorts, for example, but it relies on the same
+// idea.
+//
+// 4- Move points.
+//
+// Now that the block knows how many points go in each of its 4 children, it
+// remains to dispatch the points. It is straightforward.
+//
+// 5- Launch new blocks.
+//
+// The block launches four new blocks: One per children. Each of the four blocks
+// will apply the same algorithm.
+////////////////////////////////////////////////////////////////////////////////
+template <int NUM_THREADS_PER_BLOCK>
+__global__ void build_quadtree_kernel(Quadtree_node *nodes, Points *points,
+                                      Parameters params) {
+  // Handle to thread block group
+  cg::thread_block cta = cg::this_thread_block();
+  // The number of warps in a block.
+  const int NUM_WARPS_PER_BLOCK = NUM_THREADS_PER_BLOCK / warpSize;
+
+  // Shared memory to store the number of points.
+  extern __shared__ int smem[];
+
+  // s_num_pts[4][NUM_WARPS_PER_BLOCK];
+  // Addresses of shared memory.
+  volatile int *s_num_pts[4];
+
+  for (int i = 0; i < 4; ++i)
+    s_num_pts[i] = (volatile int *)&smem[i * NUM_WARPS_PER_BLOCK];
+
+  // Compute the coordinates of the threads in the block.
+  const int warp_id = threadIdx.x / warpSize;
+  const int lane_id = threadIdx.x % warpSize;
+
+  // Mask for compaction.
+  // Same as: asm( "mov.u32 %0, %%lanemask_lt;" : "=r"(lane_mask_lt) );
+  int lane_mask_lt = (1 << lane_id) - 1;
+
+  // The current node.
+  Quadtree_node &node = nodes[blockIdx.x];
+
+  // The number of points in the node.
+  int num_points = node.num_points();
+
+  float2 center;
+  int range_begin, range_end;
+  int warp_cnts[4] = {0, 0, 0, 0};
+  //
+  // 1- Check the number of points and its depth.
+  //
+
+  // Stop the recursion here. Make sure points[0] contains all the points.
+  if (params.depth >= params.max_depth ||
+      num_points <= params.min_points_per_node) {
+    if (params.point_selector == 1) {
+      int it = node.points_begin(), end = node.points_end();
+
+      for (it += threadIdx.x; it < end; it += NUM_THREADS_PER_BLOCK)
+        if (it < end) points[0].set_point(it, points[1].get_point(it));
+    }
+
+    return;
+  }
+
+  // Compute the center of the bounding box of the points.
+  const Bounding_box &bbox = node.bounding_box();
+
+  bbox.compute_center(center);
+
+  // Find how many points to give to each warp.
+  int num_points_per_warp = max(
+      warpSize, (num_points + NUM_WARPS_PER_BLOCK - 1) / NUM_WARPS_PER_BLOCK);
+
+  // Each warp of threads will compute the number of points to move to each
+  // quadrant.
+  range_begin = node.points_begin() + warp_id * num_points_per_warp;
+  range_end = min(range_begin + num_points_per_warp, node.points_end());
+
+  //
+  // 2- Count the number of points in each child.
+  //
+
+  // Input points.
+  const Points &in_points = points[params.point_selector];
+
+  cg::thread_block_tile<32> tile32 = cg::tiled_partition<32>(cta);
+  // Compute the number of points.
+  for (int range_it = range_begin + tile32.thread_rank();
+       tile32.any(range_it < range_end); range_it += warpSize) {
+    // Is it still an active thread?
+    bool is_active = range_it < range_end;
+
+    // Load the coordinates of the point.
+    float2 p =
+        is_active ? in_points.get_point(range_it) : make_float2(0.0f, 0.0f);
+
+    // Count top-left points.
+    int num_pts =
+        __popc(tile32.ballot(is_active && p.x < center.x && p.y >= center.y));
+    warp_cnts[0] += tile32.shfl(num_pts, 0);
+
+    // Count top-right points.
+    num_pts =
+        __popc(tile32.ballot(is_active && p.x >= center.x && p.y >= center.y));
+    warp_cnts[1] += tile32.shfl(num_pts, 0);
+
+    // Count bottom-left points.
+    num_pts =
+        __popc(tile32.ballot(is_active && p.x < center.x && p.y < center.y));
+    warp_cnts[2] += tile32.shfl(num_pts, 0);
+
+    // Count bottom-right points.
+    num_pts =
+        __popc(tile32.ballot(is_active && p.x >= center.x && p.y < center.y));
+    warp_cnts[3] += tile32.shfl(num_pts, 0);
+  }
+
+  if (tile32.thread_rank() == 0) {
+    s_num_pts[0][warp_id] = warp_cnts[0];
+    s_num_pts[1][warp_id] = warp_cnts[1];
+    s_num_pts[2][warp_id] = warp_cnts[2];
+    s_num_pts[3][warp_id] = warp_cnts[3];
+  }
+
+  // Make sure warps have finished counting.
+  cg::sync(cta);
+
+  //
+  // 3- Scan the warps' results to know the "global" numbers.
+  //
+
+  // First 4 warps scan the numbers of points per child (inclusive scan).
+  if (warp_id < 4) {
+    int num_pts = tile32.thread_rank() < NUM_WARPS_PER_BLOCK
+                      ? s_num_pts[warp_id][tile32.thread_rank()]
+                      : 0;
+#pragma unroll
+
+    for (int offset = 1; offset < NUM_WARPS_PER_BLOCK; offset *= 2) {
+      int n = tile32.shfl_up(num_pts, offset);
+
+      if (tile32.thread_rank() >= offset) num_pts += n;
+    }
+
+    if (tile32.thread_rank() < NUM_WARPS_PER_BLOCK)
+      s_num_pts[warp_id][tile32.thread_rank()] = num_pts;
+  }
+
+  cg::sync(cta);
+
+  // Compute global offsets.
+  if (warp_id == 0) {
+    int sum = s_num_pts[0][NUM_WARPS_PER_BLOCK - 1];
+
+    for (int row = 1; row < 4; ++row) {
+      int tmp = s_num_pts[row][NUM_WARPS_PER_BLOCK - 1];
+      cg::sync(tile32);
+
+      if (tile32.thread_rank() < NUM_WARPS_PER_BLOCK)
+        s_num_pts[row][tile32.thread_rank()] += sum;
+
+      cg::sync(tile32);
+      sum += tmp;
+    }
+  }
+
+  cg::sync(cta);
+
+  // Make the scan exclusive.
+  int val = 0;
+  if (threadIdx.x < 4 * NUM_WARPS_PER_BLOCK) {
+    val = threadIdx.x == 0 ? 0 : smem[threadIdx.x - 1];
+    val += node.points_begin();
+  }
+
+  cg::sync(cta);
+
+  if (threadIdx.x < 4 * NUM_WARPS_PER_BLOCK) {
+    smem[threadIdx.x] = val;
+  }
+
+  cg::sync(cta);
+
+  //
+  // 4- Move points.
+  //
+  if (!(params.depth >= params.max_depth ||
+        num_points <= params.min_points_per_node)) {
+    // Output points.
+    Points &out_points = points[(params.point_selector + 1) % 2];
+
+    warp_cnts[0] = s_num_pts[0][warp_id];
+    warp_cnts[1] = s_num_pts[1][warp_id];
+    warp_cnts[2] = s_num_pts[2][warp_id];
+    warp_cnts[3] = s_num_pts[3][warp_id];
+
+    const Points &in_points = points[params.point_selector];
+    // Reorder points.
+    for (int range_it = range_begin + tile32.thread_rank();
+         tile32.any(range_it < range_end); range_it += warpSize) {
+      // Is it still an active thread?
+      bool is_active = range_it < range_end;
+
+      // Load the coordinates of the point.
+      float2 p =
+          is_active ? in_points.get_point(range_it) : make_float2(0.0f, 0.0f);
+
+      // Count top-left points.
+      bool pred = is_active && p.x < center.x && p.y >= center.y;
+      int vote = tile32.ballot(pred);
+      int dest = warp_cnts[0] + __popc(vote & lane_mask_lt);
+
+      if (pred) out_points.set_point(dest, p);
+
+      warp_cnts[0] += tile32.shfl(__popc(vote), 0);
+
+      // Count top-right points.
+      pred = is_active && p.x >= center.x && p.y >= center.y;
+      vote = tile32.ballot(pred);
+      dest = warp_cnts[1] + __popc(vote & lane_mask_lt);
+
+      if (pred) out_points.set_point(dest, p);
+
+      warp_cnts[1] += tile32.shfl(__popc(vote), 0);
+
+      // Count bottom-left points.
+      pred = is_active && p.x < center.x && p.y < center.y;
+      vote = tile32.ballot(pred);
+      dest = warp_cnts[2] + __popc(vote & lane_mask_lt);
+
+      if (pred) out_points.set_point(dest, p);
+
+      warp_cnts[2] += tile32.shfl(__popc(vote), 0);
+
+      // Count bottom-right points.
+      pred = is_active && p.x >= center.x && p.y < center.y;
+      vote = tile32.ballot(pred);
+      dest = warp_cnts[3] + __popc(vote & lane_mask_lt);
+
+      if (pred) out_points.set_point(dest, p);
+
+      warp_cnts[3] += tile32.shfl(__popc(vote), 0);
+    }
+  }
+
+  cg::sync(cta);
+
+  if (tile32.thread_rank() == 0) {
+    s_num_pts[0][warp_id] = warp_cnts[0];
+    s_num_pts[1][warp_id] = warp_cnts[1];
+    s_num_pts[2][warp_id] = warp_cnts[2];
+    s_num_pts[3][warp_id] = warp_cnts[3];
+  }
+
+  cg::sync(cta);
+
+  //
+  // 5- Launch new blocks.
+  //
+  if (!(params.depth >= params.max_depth ||
+        num_points <= params.min_points_per_node)) {
+    // The last thread launches new blocks.
+    if (threadIdx.x == NUM_THREADS_PER_BLOCK - 1) {
+      // The children.
+      Quadtree_node *children =
+          &nodes[params.num_nodes_at_this_level - (node.id() & ~3)];
+
+      // The offsets of the children at their level.
+      int child_offset = 4 * node.id();
+
+      // Set IDs.
+      children[child_offset + 0].set_id(4 * node.id() + 0);
+      children[child_offset + 1].set_id(4 * node.id() + 1);
+      children[child_offset + 2].set_id(4 * node.id() + 2);
+      children[child_offset + 3].set_id(4 * node.id() + 3);
+
+      const Bounding_box &bbox = node.bounding_box();
+      // Points of the bounding-box.
+      const float2 &p_min = bbox.get_min();
+      const float2 &p_max = bbox.get_max();
+
+      // Set the bounding boxes of the children.
+      children[child_offset + 0].set_bounding_box(p_min.x, center.y, center.x,
+                                                  p_max.y);  // Top-left.
+      children[child_offset + 1].set_bounding_box(center.x, center.y, p_max.x,
+                                                  p_max.y);  // Top-right.
+      children[child_offset + 2].set_bounding_box(p_min.x, p_min.y, center.x,
+                                                  center.y);  // Bottom-left.
+      children[child_offset + 3].set_bounding_box(center.x, p_min.y, p_max.x,
+                                                  center.y);  // Bottom-right.
+
+      // Set the ranges of the children.
+
+      children[child_offset + 0].set_range(node.points_begin(),
+                                           s_num_pts[0][warp_id]);
+      children[child_offset + 1].set_range(s_num_pts[0][warp_id],
+                                           s_num_pts[1][warp_id]);
+      children[child_offset + 2].set_range(s_num_pts[1][warp_id],
+                                           s_num_pts[2][warp_id]);
+      children[child_offset + 3].set_range(s_num_pts[2][warp_id],
+                                           s_num_pts[3][warp_id]);
+
+      // Launch 4 children.
+      build_quadtree_kernel<NUM_THREADS_PER_BLOCK><<<
+          4, NUM_THREADS_PER_BLOCK, 4 * NUM_WARPS_PER_BLOCK * sizeof(int)>>>(
+          &children[child_offset], points, Parameters(params, true));
+    }
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Make sure a Quadtree is properly defined.
+////////////////////////////////////////////////////////////////////////////////
+bool check_quadtree(const Quadtree_node *nodes, int idx, int num_pts,
+                    Points *pts, Parameters params) {
+  const Quadtree_node &node = nodes[idx];
+  int num_points = node.num_points();
+
+  if (!(params.depth == params.max_depth ||
+        num_points <= params.min_points_per_node)) {
+    int num_points_in_children = 0;
+
+    num_points_in_children +=
+        nodes[params.num_nodes_at_this_level + 4 * idx + 0].num_points();
+    num_points_in_children +=
+        nodes[params.num_nodes_at_this_level + 4 * idx + 1].num_points();
+    num_points_in_children +=
+        nodes[params.num_nodes_at_this_level + 4 * idx + 2].num_points();
+    num_points_in_children +=
+        nodes[params.num_nodes_at_this_level + 4 * idx + 3].num_points();
+
+    if (num_points_in_children != node.num_points()) return false;
+
+    return check_quadtree(&nodes[params.num_nodes_at_this_level], 4 * idx + 0,
+                          num_pts, pts, Parameters(params, true)) &&
+           check_quadtree(&nodes[params.num_nodes_at_this_level], 4 * idx + 1,
+                          num_pts, pts, Parameters(params, true)) &&
+           check_quadtree(&nodes[params.num_nodes_at_this_level], 4 * idx + 2,
+                          num_pts, pts, Parameters(params, true)) &&
+           check_quadtree(&nodes[params.num_nodes_at_this_level], 4 * idx + 3,
+                          num_pts, pts, Parameters(params, true));
+  }
+
+  const Bounding_box &bbox = node.bounding_box();
+
+  for (int it = node.points_begin(); it < node.points_end(); ++it) {
+    if (it >= num_pts) return false;
+
+    float2 p = pts->get_point(it);
+
+    if (!bbox.contains(p)) return false;
+  }
+
+  return true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Parallel random number generator.
+////////////////////////////////////////////////////////////////////////////////
+struct Random_generator {
+  int count;
+
+  __host__ __device__ Random_generator() : count(0) {}
+  __host__ __device__ unsigned int hash(unsigned int a) {
+    a = (a + 0x7ed55d16) + (a << 12);
+    a = (a ^ 0xc761c23c) ^ (a >> 19);
+    a = (a + 0x165667b1) + (a << 5);
+    a = (a + 0xd3a2646c) ^ (a << 9);
+    a = (a + 0xfd7046c5) + (a << 3);
+    a = (a ^ 0xb55a4f09) ^ (a >> 16);
+    return a;
+  }
+
+  __host__ __device__ __forceinline__ thrust::tuple<float, float> operator()() {
+#ifdef __CUDA_ARCH__
+    unsigned seed = hash(blockIdx.x * blockDim.x + threadIdx.x + count);
+    // thrust::generate may call operator() more than once per thread.
+    // Hence, increment count by grid size to ensure uniqueness of seed
+    count += blockDim.x * gridDim.x;
+#else
+    unsigned seed = hash(0);
+#endif
+    thrust::default_random_engine rng(seed);
+    thrust::random::uniform_real_distribution<float> distrib;
+    return thrust::make_tuple(distrib(rng), distrib(rng));
+  }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Allocate GPU structs, launch kernel and clean up
+////////////////////////////////////////////////////////////////////////////////
+bool cdpQuadtree(int warp_size) {
+  // Constants to control the algorithm.
+  const int num_points = 1024;
+  const int max_depth = 8;
+  const int min_points_per_node = 16;
+
+  // Allocate memory for points.
+  thrust::device_vector<float> x_d0(num_points);
+  thrust::device_vector<float> x_d1(num_points);
+  thrust::device_vector<float> y_d0(num_points);
+  thrust::device_vector<float> y_d1(num_points);
+
+  // Generate random points.
+  Random_generator rnd;
+  thrust::generate(
+      thrust::make_zip_iterator(thrust::make_tuple(x_d0.begin(), y_d0.begin())),
+      thrust::make_zip_iterator(thrust::make_tuple(x_d0.end(), y_d0.end())),
+      rnd);
+
+  // Host structures to analyze the device ones.
+  Points points_init[2];
+  points_init[0].set(thrust::raw_pointer_cast(&x_d0[0]),
+                     thrust::raw_pointer_cast(&y_d0[0]));
+  points_init[1].set(thrust::raw_pointer_cast(&x_d1[0]),
+                     thrust::raw_pointer_cast(&y_d1[0]));
+
+  // Allocate memory to store points.
+  Points *points;
+  checkCudaErrors(cudaMalloc((void **)&points, 2 * sizeof(Points)));
+  checkCudaErrors(cudaMemcpy(points, points_init, 2 * sizeof(Points),
+                             cudaMemcpyHostToDevice));
+
+  // We could use a close form...
+  int max_nodes = 0;
+
+  for (int i = 0, num_nodes_at_level = 1; i < max_depth;
+       ++i, num_nodes_at_level *= 4)
+    max_nodes += num_nodes_at_level;
+
+  // Allocate memory to store the tree.
+  Quadtree_node root;
+  root.set_range(0, num_points);
+  Quadtree_node *nodes;
+  checkCudaErrors(
+      cudaMalloc((void **)&nodes, max_nodes * sizeof(Quadtree_node)));
+  checkCudaErrors(
+      cudaMemcpy(nodes, &root, sizeof(Quadtree_node), cudaMemcpyHostToDevice));
+
+  // We set the recursion limit for CDP to max_depth.
+  cudaDeviceSetLimit(cudaLimitDevRuntimeSyncDepth, max_depth);
+
+  // Build the quadtree.
+  Parameters params(max_depth, min_points_per_node);
+  std::cout << "Launching CDP kernel to build the quadtree" << std::endl;
+  const int NUM_THREADS_PER_BLOCK = 128;  // Do not use less than 128 threads.
+  const int NUM_WARPS_PER_BLOCK = NUM_THREADS_PER_BLOCK / warp_size;
+  const size_t smem_size = 4 * NUM_WARPS_PER_BLOCK * sizeof(int);
+  build_quadtree_kernel<
+      NUM_THREADS_PER_BLOCK><<<1, NUM_THREADS_PER_BLOCK, smem_size>>>(
+      nodes, points, params);
+  checkCudaErrors(cudaGetLastError());
+
+  // Copy points to CPU.
+  thrust::host_vector<float> x_h(x_d0);
+  thrust::host_vector<float> y_h(y_d0);
+  Points host_points;
+  host_points.set(thrust::raw_pointer_cast(&x_h[0]),
+                  thrust::raw_pointer_cast(&y_h[0]));
+
+  // Copy nodes to CPU.
+  Quadtree_node *host_nodes = new Quadtree_node[max_nodes];
+  checkCudaErrors(cudaMemcpy(host_nodes, nodes,
+                             max_nodes * sizeof(Quadtree_node),
+                             cudaMemcpyDeviceToHost));
+
+  // Validate the results.
+  bool ok = check_quadtree(host_nodes, 0, num_points, &host_points, params);
+  std::cout << "Results: " << (ok ? "OK" : "FAILED") << std::endl;
+
+  // Free CPU memory.
+  delete[] host_nodes;
+
+  // Free memory.
+  checkCudaErrors(cudaFree(nodes));
+  checkCudaErrors(cudaFree(points));
+
+  return ok;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Main entry point.
+////////////////////////////////////////////////////////////////////////////////
+int main(int argc, char **argv) {
+  // Find/set the device.
+  // The test requires an architecture SM35 or greater (CDP capable).
+  int cuda_device = findCudaDevice(argc, (const char **)argv);
+  cudaDeviceProp deviceProps;
+  checkCudaErrors(cudaGetDeviceProperties(&deviceProps, cuda_device));
+  int cdpCapable = (deviceProps.major == 3 && deviceProps.minor >= 5) ||
+                   deviceProps.major >= 4;
+
+  printf("GPU device %s has compute capabilities (SM %d.%d)\n",
+         deviceProps.name, deviceProps.major, deviceProps.minor);
+
+  if (!cdpCapable) {
+    std::cerr << "cdpQuadTree requires SM 3.5 or higher to use CUDA Dynamic "
+                 "Parallelism.  Exiting...\n"
+              << std::endl;
+    exit(EXIT_WAIVED);
+  }
+
+  bool ok = cdpQuadtree(deviceProps.warpSize);
+
+  return (ok ? EXIT_SUCCESS : EXIT_FAILURE);
+}
diff --git a/Samples/cdpQuadtree/cdpQuadtree_vs2017.sln b/Samples/cdpQuadtree/cdpQuadtree_vs2017.sln
new file mode 100644
index 00000000..e4f10db0
--- /dev/null
+++ b/Samples/cdpQuadtree/cdpQuadtree_vs2017.sln
@@ -0,0 +1,20 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 2017
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cdpQuadtree", "cdpQuadtree_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
+		{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
+		{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
+		{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/Samples/cdpQuadtree/cdpQuadtree_vs2017.vcxproj b/Samples/cdpQuadtree/cdpQuadtree_vs2017.vcxproj
new file mode 100644
index 00000000..162371e0
--- /dev/null
+++ b/Samples/cdpQuadtree/cdpQuadtree_vs2017.vcxproj
@@ -0,0 +1,114 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup>
+    <CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
+  </PropertyGroup>
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
+    <RootNamespace>cdpQuadtree_vs2017</RootNamespace>
+    <ProjectName>cdpQuadtree</ProjectName>
+    <CudaToolkitCustomDir />
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
+    <LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
+    <WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
+    <TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup>
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)'=='Debug'">
+    <UseDebugLibraries>true</UseDebugLibraries>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)'=='Release'">
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets">
+    <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <IntDir>$(Platform)/$(Configuration)/</IntDir>
+    <IncludePath>$(IncludePath)</IncludePath>
+    <CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules />
+    <CodeAnalysisRuleAssemblies />
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Platform)'=='x64'">
+    <OutDir>../../bin/win64/$(Configuration)/</OutDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup>
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <AdditionalDependencies>cudadevrt.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
+      <OutputFile>$(OutDir)/cdpQuadtree.exe</OutputFile>
+    </Link>
+    <CudaCompile>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
+      <Include>./;../../Common</Include>
+      <Defines>WIN32</Defines>
+      <GenerateRelocatableDeviceCode>true</GenerateRelocatableDeviceCode>
+      <AdditionalOptions>--threads 0</AdditionalOptions>
+    </CudaCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
+    </Link>
+    <CudaCompile>
+      <Runtime>MTd</Runtime>
+      <TargetMachinePlatform>64</TargetMachinePlatform>
+    </CudaCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>false</GenerateDebugInformation>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+    </Link>
+    <CudaCompile>
+      <Runtime>MT</Runtime>
+      <TargetMachinePlatform>64</TargetMachinePlatform>
+    </CudaCompile>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <CudaCompile Include="cdpQuadtree.cu" />
+
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
+  </ImportGroup>
+</Project>
diff --git a/Samples/cdpQuadtree/cdpQuadtree_vs2019.sln b/Samples/cdpQuadtree/cdpQuadtree_vs2019.sln
new file mode 100644
index 00000000..beafe616
--- /dev/null
+++ b/Samples/cdpQuadtree/cdpQuadtree_vs2019.sln
@@ -0,0 +1,20 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 2019
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cdpQuadtree", "cdpQuadtree_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
+		{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
+		{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
+		{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/Samples/cdpQuadtree/cdpQuadtree_vs2019.vcxproj b/Samples/cdpQuadtree/cdpQuadtree_vs2019.vcxproj
new file mode 100644
index 00000000..bb56716b
--- /dev/null
+++ b/Samples/cdpQuadtree/cdpQuadtree_vs2019.vcxproj
@@ -0,0 +1,110 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup>
+    <CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
+  </PropertyGroup>
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
+    <RootNamespace>cdpQuadtree_vs2019</RootNamespace>
+    <ProjectName>cdpQuadtree</ProjectName>
+    <CudaToolkitCustomDir />
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup>
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v142</PlatformToolset>
+    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)'=='Debug'">
+    <UseDebugLibraries>true</UseDebugLibraries>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)'=='Release'">
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets">
+    <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <IntDir>$(Platform)/$(Configuration)/</IntDir>
+    <IncludePath>$(IncludePath)</IncludePath>
+    <CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
+    <CodeAnalysisRules />
+    <CodeAnalysisRuleAssemblies />
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Platform)'=='x64'">
+    <OutDir>../../bin/win64/$(Configuration)/</OutDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup>
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <AdditionalDependencies>cudadevrt.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
+      <OutputFile>$(OutDir)/cdpQuadtree.exe</OutputFile>
+    </Link>
+    <CudaCompile>
+      <CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
+      <AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
+      <Include>./;../../Common</Include>
+      <Defines>WIN32</Defines>
+      <GenerateRelocatableDeviceCode>true</GenerateRelocatableDeviceCode>
+      <AdditionalOptions>--threads 0</AdditionalOptions>
+    </CudaCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
+    </Link>
+    <CudaCompile>
+      <Runtime>MTd</Runtime>
+      <TargetMachinePlatform>64</TargetMachinePlatform>
+    </CudaCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
+    <ClCompile>
+      <Optimization>MaxSpeed</Optimization>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>false</GenerateDebugInformation>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
+    </Link>
+    <CudaCompile>
+      <Runtime>MT</Runtime>
+      <TargetMachinePlatform>64</TargetMachinePlatform>
+    </CudaCompile>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <CudaCompile Include="cdpQuadtree.cu" />
+
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
+  </ImportGroup>
+</Project>
diff --git a/Samples/concurrentKernels/README.md b/Samples/concurrentKernels/README.md
index b3a52d91..0bd5287f 100644
--- a/Samples/concurrentKernels/README.md
+++ b/Samples/concurrentKernels/README.md
@@ -24,7 +24,7 @@ x86_64, ppc64le, armv7l
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/concurrentKernels/concurrentKernels_vs2017.vcxproj b/Samples/concurrentKernels/concurrentKernels_vs2017.vcxproj
index f8036198..9c014c79 100644
--- a/Samples/concurrentKernels/concurrentKernels_vs2017.vcxproj
+++ b/Samples/concurrentKernels/concurrentKernels_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/concurrentKernels/concurrentKernels_vs2019.vcxproj b/Samples/concurrentKernels/concurrentKernels_vs2019.vcxproj
index f6224739..63ace459 100644
--- a/Samples/concurrentKernels/concurrentKernels_vs2019.vcxproj
+++ b/Samples/concurrentKernels/concurrentKernels_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/conjugateGradientCudaGraphs/Makefile b/Samples/conjugateGradientCudaGraphs/Makefile
index 6609440a..13d1e4ee 100644
--- a/Samples/conjugateGradientCudaGraphs/Makefile
+++ b/Samples/conjugateGradientCudaGraphs/Makefile
@@ -265,12 +265,6 @@ ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
 
 SAMPLE_ENABLED := 1
 
-# This sample is not supported on QNX
-ifeq ($(TARGET_OS),qnx)
-  $(info >>> WARNING - conjugateGradientCudaGraphs is not supported on QNX - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
 ALL_LDFLAGS :=
 ALL_LDFLAGS += $(ALL_CCFLAGS)
 ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
diff --git a/Samples/conjugateGradientCudaGraphs/README.md b/Samples/conjugateGradientCudaGraphs/README.md
index 1e723476..beebe833 100644
--- a/Samples/conjugateGradientCudaGraphs/README.md
+++ b/Samples/conjugateGradientCudaGraphs/README.md
@@ -30,7 +30,7 @@ cudaStreamBeginCapture, cudaStreamEndCapture, cudaGraphCreate, cudaGraphLaunch,
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj b/Samples/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj
index a662b455..9b9eebd9 100644
--- a/Samples/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj
+++ b/Samples/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj b/Samples/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj
index 5fe964d8..20595779 100644
--- a/Samples/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj
+++ b/Samples/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/conjugateGradientMultiBlockCG/README.md b/Samples/conjugateGradientMultiBlockCG/README.md
index 217fabf3..d2b76ee8 100644
--- a/Samples/conjugateGradientMultiBlockCG/README.md
+++ b/Samples/conjugateGradientMultiBlockCG/README.md
@@ -27,7 +27,7 @@ x86_64, ppc64le, aarch64
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj b/Samples/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj
index 9692e5fe..137acb27 100644
--- a/Samples/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj
+++ b/Samples/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj b/Samples/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj
index 9952e93f..0f7f669f 100644
--- a/Samples/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj
+++ b/Samples/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/conjugateGradientMultiDeviceCG/README.md b/Samples/conjugateGradientMultiDeviceCG/README.md
index 099a61bd..fbf8b563 100644
--- a/Samples/conjugateGradientMultiDeviceCG/README.md
+++ b/Samples/conjugateGradientMultiDeviceCG/README.md
@@ -30,7 +30,7 @@ cudaMemAdvise, cudaMemPrefetchAsync, cudaLaunchCooperativeKernelMultiDevice, cud
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj b/Samples/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj
index 281b9f54..d5f0924e 100644
--- a/Samples/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj
+++ b/Samples/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj b/Samples/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj
index da03363e..68964bc3 100644
--- a/Samples/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj
+++ b/Samples/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/cuSolverDn_LinearSolver/Makefile b/Samples/cuSolverDn_LinearSolver/Makefile
index 61e55f47..f8b34a31 100644
--- a/Samples/cuSolverDn_LinearSolver/Makefile
+++ b/Samples/cuSolverDn_LinearSolver/Makefile
@@ -271,12 +271,6 @@ ifeq ($(TARGET_ARCH),armv7l)
   SAMPLE_ENABLED := 0
 endif
 
-# This sample is not supported on QNX
-ifeq ($(TARGET_OS),qnx)
-  $(info >>> WARNING - cuSolverDn_LinearSolver is not supported on QNX - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
 ifeq ($(TARGET_OS),linux)
 ALL_CCFLAGS += -Xcompiler \"-Wl,--no-as-needed\"
 endif
diff --git a/Samples/cuSolverDn_LinearSolver/README.md b/Samples/cuSolverDn_LinearSolver/README.md
index 185f577b..84190e53 100644
--- a/Samples/cuSolverDn_LinearSolver/README.md
+++ b/Samples/cuSolverDn_LinearSolver/README.md
@@ -27,7 +27,7 @@ x86_64, ppc64le, aarch64
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj b/Samples/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj
index 8d77015c..b725348d 100644
--- a/Samples/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj
+++ b/Samples/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -110,6 +110,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj b/Samples/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj
index d3f1e05e..ffdac8c8 100644
--- a/Samples/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj
+++ b/Samples/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/cuSolverSp_LinearSolver/Makefile b/Samples/cuSolverSp_LinearSolver/Makefile
index cc002581..59a043d9 100644
--- a/Samples/cuSolverSp_LinearSolver/Makefile
+++ b/Samples/cuSolverSp_LinearSolver/Makefile
@@ -265,12 +265,6 @@ ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
 
 SAMPLE_ENABLED := 1
 
-# This sample is not supported on QNX
-ifeq ($(TARGET_OS),qnx)
-  $(info >>> WARNING - cuSolverSp_LinearSolver is not supported on QNX - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
 ifeq ($(TARGET_OS),linux)
 ALL_CCFLAGS += -Xcompiler \"-Wl,--no-as-needed\"
 endif
diff --git a/Samples/cuSolverSp_LinearSolver/README.md b/Samples/cuSolverSp_LinearSolver/README.md
index 35c105ee..fd506b35 100644
--- a/Samples/cuSolverSp_LinearSolver/README.md
+++ b/Samples/cuSolverSp_LinearSolver/README.md
@@ -27,7 +27,7 @@ x86_64, ppc64le, armv7l
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj b/Samples/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj
index 1bdf5779..25e29047 100644
--- a/Samples/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj
+++ b/Samples/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -110,6 +110,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj b/Samples/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj
index 665f795e..05e8ddf0 100644
--- a/Samples/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj
+++ b/Samples/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -106,6 +106,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/cudaCompressibleMemory/README.md b/Samples/cudaCompressibleMemory/README.md
index 6492f737..82a77278 100644
--- a/Samples/cudaCompressibleMemory/README.md
+++ b/Samples/cudaCompressibleMemory/README.md
@@ -30,7 +30,7 @@ cudaMalloc, cudaFree
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj b/Samples/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj
index 4f450a2c..a420c541 100644
--- a/Samples/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj
+++ b/Samples/cudaCompressibleMemory/cudaCompressibleMemory_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj b/Samples/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj
index a932dd47..1be97e5e 100644
--- a/Samples/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj
+++ b/Samples/cudaCompressibleMemory/cudaCompressibleMemory_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/cudaNvSci/Makefile b/Samples/cudaNvSci/Makefile
index d7db232f..1ef041a8 100644
--- a/Samples/cudaNvSci/Makefile
+++ b/Samples/cudaNvSci/Makefile
@@ -279,12 +279,6 @@ ifeq ($(TARGET_ARCH),armv7l)
   SAMPLE_ENABLED := 0
 endif
 
-# This sample is not supported on QNX
-ifeq ($(TARGET_OS),qnx)
-  $(info >>> WARNING - cudaNvSci is not supported on QNX - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
 ALL_LDFLAGS :=
 ALL_LDFLAGS += $(ALL_CCFLAGS)
 ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
diff --git a/Samples/cudaNvSci/README.md b/Samples/cudaNvSci/README.md
index 58d95d19..030c8ad8 100644
--- a/Samples/cudaNvSci/README.md
+++ b/Samples/cudaNvSci/README.md
@@ -30,7 +30,7 @@ cudaImportExternalMemory, cudaExternalMemoryGetMappedBuffer, cudaExternalMemoryG
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/cudaNvSciNvMedia/README.md b/Samples/cudaNvSciNvMedia/README.md
index a8e1a41c..1c009532 100644
--- a/Samples/cudaNvSciNvMedia/README.md
+++ b/Samples/cudaNvSciNvMedia/README.md
@@ -30,7 +30,7 @@ cudaImportExternalMemory, cudaExternalMemoryGetMappedBuffer, cudaExternalMemoryG
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/cudaOpenMP/README.md b/Samples/cudaOpenMP/README.md
index c2f88493..3d6fe64f 100644
--- a/Samples/cudaOpenMP/README.md
+++ b/Samples/cudaOpenMP/README.md
@@ -30,7 +30,7 @@ cudaMalloc, cudaFree, cudaMemcpy
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/cudaOpenMP/cudaOpenMP_vs2017.vcxproj b/Samples/cudaOpenMP/cudaOpenMP_vs2017.vcxproj
index d3a04a75..42226124 100644
--- a/Samples/cudaOpenMP/cudaOpenMP_vs2017.vcxproj
+++ b/Samples/cudaOpenMP/cudaOpenMP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/cudaOpenMP/cudaOpenMP_vs2019.vcxproj b/Samples/cudaOpenMP/cudaOpenMP_vs2019.vcxproj
index 59018165..a12a7d9e 100644
--- a/Samples/cudaOpenMP/cudaOpenMP_vs2019.vcxproj
+++ b/Samples/cudaOpenMP/cudaOpenMP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/cudaTensorCoreGemm/README.md b/Samples/cudaTensorCoreGemm/README.md
index 502941ae..3f88fe67 100644
--- a/Samples/cudaTensorCoreGemm/README.md
+++ b/Samples/cudaTensorCoreGemm/README.md
@@ -31,7 +31,7 @@ cudaMallocManaged, cudaDeviceSynchronize, cudaFuncSetAttribute, cudaEventCreate,
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj b/Samples/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj
index 622ffe8f..7f7f17ce 100644
--- a/Samples/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj
+++ b/Samples/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj b/Samples/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj
index fb649d4c..42487257 100644
--- a/Samples/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj
+++ b/Samples/cudaTensorCoreGemm/cudaTensorCoreGemm_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/deviceQuery/README.md b/Samples/deviceQuery/README.md
index 76fb08d5..dbd8e3da 100644
--- a/Samples/deviceQuery/README.md
+++ b/Samples/deviceQuery/README.md
@@ -27,7 +27,7 @@ cudaSetDevice, cudaGetDeviceCount, cudaGetDeviceProperties, cudaDriverGetVersion
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/deviceQuery/deviceQuery_vs2017.vcxproj b/Samples/deviceQuery/deviceQuery_vs2017.vcxproj
index 5bd56297..0013948e 100644
--- a/Samples/deviceQuery/deviceQuery_vs2017.vcxproj
+++ b/Samples/deviceQuery/deviceQuery_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/deviceQuery/deviceQuery_vs2019.vcxproj b/Samples/deviceQuery/deviceQuery_vs2019.vcxproj
index f8532544..8a797564 100644
--- a/Samples/deviceQuery/deviceQuery_vs2019.vcxproj
+++ b/Samples/deviceQuery/deviceQuery_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/dmmaTensorCoreGemm/README.md b/Samples/dmmaTensorCoreGemm/README.md
index aa6e6f16..f157b6f1 100644
--- a/Samples/dmmaTensorCoreGemm/README.md
+++ b/Samples/dmmaTensorCoreGemm/README.md
@@ -27,7 +27,7 @@ cudaMallocManaged, cudaDeviceSynchronize, cudaFuncSetAttribute, cudaEventCreate,
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj b/Samples/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj
index 5ea929c1..19d1e7ab 100644
--- a/Samples/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj
+++ b/Samples/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj b/Samples/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj
index b415db92..7423334b 100644
--- a/Samples/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj
+++ b/Samples/dmmaTensorCoreGemm/dmmaTensorCoreGemm_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/globalToShmemAsyncCopy/README.md b/Samples/globalToShmemAsyncCopy/README.md
index 233d5b50..1646dc0d 100644
--- a/Samples/globalToShmemAsyncCopy/README.md
+++ b/Samples/globalToShmemAsyncCopy/README.md
@@ -30,7 +30,7 @@ cudaEventCreate, cudaEventRecord, cudaEventQuery, cudaEventDestroy, cudaEventEla
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj b/Samples/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj
index added1d2..ef431b2f 100644
--- a/Samples/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj
+++ b/Samples/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj b/Samples/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj
index bf65f63a..b506e825 100644
--- a/Samples/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj
+++ b/Samples/globalToShmemAsyncCopy/globalToShmemAsyncCopy_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/immaTensorCoreGemm/README.md b/Samples/immaTensorCoreGemm/README.md
index 3c07bb95..69655ee6 100644
--- a/Samples/immaTensorCoreGemm/README.md
+++ b/Samples/immaTensorCoreGemm/README.md
@@ -27,7 +27,7 @@ cudaMallocManaged, cudaDeviceSynchronize, cudaFuncSetAttribute, cudaEventCreate,
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj b/Samples/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj
index d6942bc2..b6b86157 100644
--- a/Samples/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj
+++ b/Samples/immaTensorCoreGemm/immaTensorCoreGemm_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj b/Samples/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj
index 6ecb5d5f..98e76bba 100644
--- a/Samples/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj
+++ b/Samples/immaTensorCoreGemm/immaTensorCoreGemm_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/jacobiCudaGraphs/README.md b/Samples/jacobiCudaGraphs/README.md
index c6223ff2..591325fc 100644
--- a/Samples/jacobiCudaGraphs/README.md
+++ b/Samples/jacobiCudaGraphs/README.md
@@ -25,7 +25,7 @@ cudaStreamBeginCapture, cudaStreamEndCapture, cudaGraphCreate, cudaGraphLaunch,
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj b/Samples/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj
index c899fc38..f077e41b 100644
--- a/Samples/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj
+++ b/Samples/jacobiCudaGraphs/jacobiCudaGraphs_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj b/Samples/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj
index c6158ebd..438f463e 100644
--- a/Samples/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj
+++ b/Samples/jacobiCudaGraphs/jacobiCudaGraphs_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/matrixMul/README.md b/Samples/matrixMul/README.md
index 5d9dba69..bb0e280e 100644
--- a/Samples/matrixMul/README.md
+++ b/Samples/matrixMul/README.md
@@ -27,7 +27,7 @@ cudaEventCreate, cudaEventRecord, cudaEventQuery, cudaEventDestroy, cudaEventEla
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/matrixMul/matrixMul_vs2017.vcxproj b/Samples/matrixMul/matrixMul_vs2017.vcxproj
index c362684f..3e4e5864 100644
--- a/Samples/matrixMul/matrixMul_vs2017.vcxproj
+++ b/Samples/matrixMul/matrixMul_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/matrixMul/matrixMul_vs2019.vcxproj b/Samples/matrixMul/matrixMul_vs2019.vcxproj
index 084d32b0..dc1a1886 100644
--- a/Samples/matrixMul/matrixMul_vs2019.vcxproj
+++ b/Samples/matrixMul/matrixMul_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/matrixMulDrv/README.md b/Samples/matrixMulDrv/README.md
index 248b61c5..a604f373 100644
--- a/Samples/matrixMulDrv/README.md
+++ b/Samples/matrixMulDrv/README.md
@@ -27,7 +27,7 @@ cuModuleLoad, cuModuleLoadDataEx, cuModuleGetFunction, cuMemAlloc, cuMemFree, cu
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/matrixMulDrv/matrixMulDrv_vs2017.vcxproj b/Samples/matrixMulDrv/matrixMulDrv_vs2017.vcxproj
index 6360c07c..8d95104b 100644
--- a/Samples/matrixMulDrv/matrixMulDrv_vs2017.vcxproj
+++ b/Samples/matrixMulDrv/matrixMulDrv_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/matrixMulDrv/matrixMulDrv_vs2019.vcxproj b/Samples/matrixMulDrv/matrixMulDrv_vs2019.vcxproj
index 69a91d3c..4e4254ce 100644
--- a/Samples/matrixMulDrv/matrixMulDrv_vs2019.vcxproj
+++ b/Samples/matrixMulDrv/matrixMulDrv_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/memMapIPCDrv/README.md b/Samples/memMapIPCDrv/README.md
index 1e343fd1..64347638 100644
--- a/Samples/memMapIPCDrv/README.md
+++ b/Samples/memMapIPCDrv/README.md
@@ -30,7 +30,7 @@ cuModuleLoad, cuModuleLoadDataEx, cuModuleGetFunction, cuLaunchKernel, cuMemcpyD
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj b/Samples/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj
index 86d80be6..48555b9a 100644
--- a/Samples/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj
+++ b/Samples/memMapIPCDrv/memMapIPCDrv_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj b/Samples/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj
index 3c928e83..4873be62 100644
--- a/Samples/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj
+++ b/Samples/memMapIPCDrv/memMapIPCDrv_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/nvJPEG/Makefile b/Samples/nvJPEG/Makefile
index f3515c78..d8c228df 100644
--- a/Samples/nvJPEG/Makefile
+++ b/Samples/nvJPEG/Makefile
@@ -277,12 +277,6 @@ ifeq ($(TARGET_ARCH),armv7l)
   SAMPLE_ENABLED := 0
 endif
 
-# This sample is not supported on QNX
-ifeq ($(TARGET_OS),qnx)
-  $(info >>> WARNING - nvJPEG is not supported on QNX - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
 ALL_LDFLAGS :=
 ALL_LDFLAGS += $(ALL_CCFLAGS)
 ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
diff --git a/Samples/nvJPEG/README.md b/Samples/nvJPEG/README.md
index 53c1b60d..4b67d66c 100644
--- a/Samples/nvJPEG/README.md
+++ b/Samples/nvJPEG/README.md
@@ -25,7 +25,7 @@ x86_64, ppc64le, aarch64
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/nvJPEG/nvJPEG_vs2017.vcxproj b/Samples/nvJPEG/nvJPEG_vs2017.vcxproj
index 7d16e568..cb425c51 100644
--- a/Samples/nvJPEG/nvJPEG_vs2017.vcxproj
+++ b/Samples/nvJPEG/nvJPEG_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/nvJPEG/nvJPEG_vs2019.vcxproj b/Samples/nvJPEG/nvJPEG_vs2019.vcxproj
index 378b9198..f3ef157d 100644
--- a/Samples/nvJPEG/nvJPEG_vs2019.vcxproj
+++ b/Samples/nvJPEG/nvJPEG_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/nvJPEG_encoder/Makefile b/Samples/nvJPEG_encoder/Makefile
index da0b82b7..05228d1d 100644
--- a/Samples/nvJPEG_encoder/Makefile
+++ b/Samples/nvJPEG_encoder/Makefile
@@ -277,12 +277,6 @@ ifeq ($(TARGET_ARCH),armv7l)
   SAMPLE_ENABLED := 0
 endif
 
-# This sample is not supported on QNX
-ifeq ($(TARGET_OS),qnx)
-  $(info >>> WARNING - nvJPEG_encoder is not supported on QNX - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
 ALL_LDFLAGS :=
 ALL_LDFLAGS += $(ALL_CCFLAGS)
 ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
diff --git a/Samples/nvJPEG_encoder/README.md b/Samples/nvJPEG_encoder/README.md
index 40f092b3..9cf7266a 100644
--- a/Samples/nvJPEG_encoder/README.md
+++ b/Samples/nvJPEG_encoder/README.md
@@ -25,7 +25,7 @@ x86_64, ppc64le, aarch64
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj b/Samples/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj
index 765f1f35..f81ee359 100644
--- a/Samples/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj
+++ b/Samples/nvJPEG_encoder/nvJPEG_encoder_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj b/Samples/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj
index 76fcec11..b4801b00 100644
--- a/Samples/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj
+++ b/Samples/nvJPEG_encoder/nvJPEG_encoder_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/p2pBandwidthLatencyTest/README.md b/Samples/p2pBandwidthLatencyTest/README.md
index ab1f4685..0399afcf 100644
--- a/Samples/p2pBandwidthLatencyTest/README.md
+++ b/Samples/p2pBandwidthLatencyTest/README.md
@@ -27,7 +27,7 @@ cudaDeviceCanAccessPeer, cudaDeviceEnablePeerAccess, cudaDeviceDisablePeerAccess
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj b/Samples/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj
index 28d5f5cd..036c966e 100644
--- a/Samples/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj
+++ b/Samples/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj b/Samples/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj
index 8a2d5450..00a6c14e 100644
--- a/Samples/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj
+++ b/Samples/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/reduction/README.md b/Samples/reduction/README.md
index 172d748a..d7be1b62 100644
--- a/Samples/reduction/README.md
+++ b/Samples/reduction/README.md
@@ -27,7 +27,7 @@ x86_64, ppc64le, armv7l
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/reduction/reduction_vs2017.vcxproj b/Samples/reduction/reduction_vs2017.vcxproj
index 7e14bc82..9c658bc1 100644
--- a/Samples/reduction/reduction_vs2017.vcxproj
+++ b/Samples/reduction/reduction_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/reduction/reduction_vs2019.vcxproj b/Samples/reduction/reduction_vs2019.vcxproj
index 74fb1d6a..08b7216f 100644
--- a/Samples/reduction/reduction_vs2019.vcxproj
+++ b/Samples/reduction/reduction_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/shfl_scan/README.md b/Samples/shfl_scan/README.md
index 87b6872b..a9ce5ccb 100644
--- a/Samples/shfl_scan/README.md
+++ b/Samples/shfl_scan/README.md
@@ -25,7 +25,7 @@ x86_64, ppc64le, armv7l, aarch64
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/shfl_scan/shfl_scan_vs2017.vcxproj b/Samples/shfl_scan/shfl_scan_vs2017.vcxproj
index beaad3f8..d07230e5 100644
--- a/Samples/shfl_scan/shfl_scan_vs2017.vcxproj
+++ b/Samples/shfl_scan/shfl_scan_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/shfl_scan/shfl_scan_vs2019.vcxproj b/Samples/shfl_scan/shfl_scan_vs2019.vcxproj
index 8757714e..5a4d4dda 100644
--- a/Samples/shfl_scan/shfl_scan_vs2019.vcxproj
+++ b/Samples/shfl_scan/shfl_scan_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleAWBarrier/README.md b/Samples/simpleAWBarrier/README.md
index c4003183..4f13fe3d 100644
--- a/Samples/simpleAWBarrier/README.md
+++ b/Samples/simpleAWBarrier/README.md
@@ -30,7 +30,7 @@ cudaMalloc, cudaFree, cudaMemcpyAsync
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj b/Samples/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj
index e03ef6e2..3d2e214a 100644
--- a/Samples/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj
+++ b/Samples/simpleAWBarrier/simpleAWBarrier_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj b/Samples/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj
index b4be9610..d0c785cd 100644
--- a/Samples/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj
+++ b/Samples/simpleAWBarrier/simpleAWBarrier_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleAttributes/README.md b/Samples/simpleAttributes/README.md
index 5d643c86..3d829879 100644
--- a/Samples/simpleAttributes/README.md
+++ b/Samples/simpleAttributes/README.md
@@ -27,7 +27,7 @@ cudaCtxResetPersistingL2Cache, cudaDeviceSetLimit, cudaFree, cudaGetDeviceProper
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/simpleAttributes/simpleAttributes_vs2017.vcxproj b/Samples/simpleAttributes/simpleAttributes_vs2017.vcxproj
index a31cf815..823668ed 100644
--- a/Samples/simpleAttributes/simpleAttributes_vs2017.vcxproj
+++ b/Samples/simpleAttributes/simpleAttributes_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleAttributes/simpleAttributes_vs2019.vcxproj b/Samples/simpleAttributes/simpleAttributes_vs2019.vcxproj
index b74d221e..e66d59e7 100644
--- a/Samples/simpleAttributes/simpleAttributes_vs2019.vcxproj
+++ b/Samples/simpleAttributes/simpleAttributes_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleCUBLAS/Makefile b/Samples/simpleCUBLAS/Makefile
index 516da194..bbdaed39 100644
--- a/Samples/simpleCUBLAS/Makefile
+++ b/Samples/simpleCUBLAS/Makefile
@@ -263,14 +263,6 @@ ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
 ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
 ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
 
-SAMPLE_ENABLED := 1
-
-# This sample is not supported on QNX
-ifeq ($(TARGET_OS),qnx)
-  $(info >>> WARNING - simpleCUBLAS is not supported on QNX - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
 ALL_LDFLAGS :=
 ALL_LDFLAGS += $(ALL_CCFLAGS)
 ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
@@ -305,10 +297,6 @@ ALL_CCFLAGS += --threads 0
 
 LIBRARIES += -lcublas
 
-ifeq ($(SAMPLE_ENABLED),0)
-EXEC ?= @echo "[@]"
-endif
-
 ################################################################################
 
 # Target rules
@@ -316,23 +304,16 @@ all: build
 
 build: simpleCUBLAS
 
-check.deps:
-ifeq ($(SAMPLE_ENABLED),0)
-	@echo "Sample will be waived due to the above missing dependencies"
-else
-	@echo "Sample is ready - all dependencies have been met"
-endif
-
 simpleCUBLAS.o:simpleCUBLAS.cpp
-	$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
+	$(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
 
 simpleCUBLAS: simpleCUBLAS.o
-	$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
-	$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
-	$(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
+	$(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
+	mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
+	cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
 
 run: build
-	$(EXEC) ./simpleCUBLAS
+	./simpleCUBLAS
 
 clean:
 	rm -f simpleCUBLAS simpleCUBLAS.o
diff --git a/Samples/simpleCUBLAS/README.md b/Samples/simpleCUBLAS/README.md
index 67dd4ce5..ba3a8517 100644
--- a/Samples/simpleCUBLAS/README.md
+++ b/Samples/simpleCUBLAS/README.md
@@ -27,7 +27,7 @@ x86_64, ppc64le, armv7l, aarch64
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj b/Samples/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj
index 3a68d707..8098fae1 100644
--- a/Samples/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj
+++ b/Samples/simpleCUBLAS/simpleCUBLAS_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj b/Samples/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj
index 6370f200..7870a053 100644
--- a/Samples/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj
+++ b/Samples/simpleCUBLAS/simpleCUBLAS_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleCUBLASXT/Makefile b/Samples/simpleCUBLASXT/Makefile
index b5759857..24e5af89 100644
--- a/Samples/simpleCUBLASXT/Makefile
+++ b/Samples/simpleCUBLASXT/Makefile
@@ -271,12 +271,6 @@ ifeq ($(TARGET_ARCH),armv7l)
   SAMPLE_ENABLED := 0
 endif
 
-# This sample is not supported on QNX
-ifeq ($(TARGET_OS),qnx)
-  $(info >>> WARNING - simpleCUBLASXT is not supported on QNX - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
 ALL_LDFLAGS :=
 ALL_LDFLAGS += $(ALL_CCFLAGS)
 ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
diff --git a/Samples/simpleCUBLASXT/README.md b/Samples/simpleCUBLASXT/README.md
index fd3decae..a92b933b 100644
--- a/Samples/simpleCUBLASXT/README.md
+++ b/Samples/simpleCUBLASXT/README.md
@@ -27,7 +27,7 @@ x86_64, ppc64le, aarch64
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj b/Samples/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj
index 3805ce26..12c606a9 100644
--- a/Samples/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj
+++ b/Samples/simpleCUBLASXT/simpleCUBLASXT_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj b/Samples/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj
index b0472a39..c527611c 100644
--- a/Samples/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj
+++ b/Samples/simpleCUBLASXT/simpleCUBLASXT_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleCUBLAS_LU/Makefile b/Samples/simpleCUBLAS_LU/Makefile
index 2c49cc17..d6094132 100644
--- a/Samples/simpleCUBLAS_LU/Makefile
+++ b/Samples/simpleCUBLAS_LU/Makefile
@@ -277,12 +277,6 @@ ifeq ($(TARGET_ARCH),armv7l)
   SAMPLE_ENABLED := 0
 endif
 
-# This sample is not supported on QNX
-ifeq ($(TARGET_OS),qnx)
-  $(info >>> WARNING - simpleCUBLAS_LU is not supported on QNX - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
 ALL_LDFLAGS :=
 ALL_LDFLAGS += $(ALL_CCFLAGS)
 ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
diff --git a/Samples/simpleCUBLAS_LU/README.md b/Samples/simpleCUBLAS_LU/README.md
index 9ef4764b..12d0ed1e 100644
--- a/Samples/simpleCUBLAS_LU/README.md
+++ b/Samples/simpleCUBLAS_LU/README.md
@@ -27,7 +27,7 @@ x86_64, ppc64le, aarch64
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj b/Samples/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj
index 7599aeda..96fc7f61 100644
--- a/Samples/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj
+++ b/Samples/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj b/Samples/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj
index 154dac0a..1f00aec9 100644
--- a/Samples/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj
+++ b/Samples/simpleCUBLAS_LU/simpleCUBLAS_LU_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleCUFFT/Makefile b/Samples/simpleCUFFT/Makefile
index c716cd0c..9e9475ee 100644
--- a/Samples/simpleCUFFT/Makefile
+++ b/Samples/simpleCUFFT/Makefile
@@ -265,12 +265,6 @@ ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
 
 SAMPLE_ENABLED := 1
 
-# This sample is not supported on QNX
-ifeq ($(TARGET_OS),qnx)
-  $(info >>> WARNING - simpleCUFFT is not supported on QNX - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
 ALL_LDFLAGS :=
 ALL_LDFLAGS += $(ALL_CCFLAGS)
 ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
diff --git a/Samples/simpleCUFFT/README.md b/Samples/simpleCUFFT/README.md
index 67227805..1efea4fd 100644
--- a/Samples/simpleCUFFT/README.md
+++ b/Samples/simpleCUFFT/README.md
@@ -27,7 +27,7 @@ x86_64, ppc64le, armv7l, aarch64
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/simpleCUFFT/simpleCUFFT_vs2017.vcxproj b/Samples/simpleCUFFT/simpleCUFFT_vs2017.vcxproj
index 914b65a7..f2b76a4b 100644
--- a/Samples/simpleCUFFT/simpleCUFFT_vs2017.vcxproj
+++ b/Samples/simpleCUFFT/simpleCUFFT_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleCUFFT/simpleCUFFT_vs2019.vcxproj b/Samples/simpleCUFFT/simpleCUFFT_vs2019.vcxproj
index 339d7959..b8f8fa04 100644
--- a/Samples/simpleCUFFT/simpleCUFFT_vs2019.vcxproj
+++ b/Samples/simpleCUFFT/simpleCUFFT_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleCudaGraphs/README.md b/Samples/simpleCudaGraphs/README.md
index 9e044f33..aa3567e2 100644
--- a/Samples/simpleCudaGraphs/README.md
+++ b/Samples/simpleCudaGraphs/README.md
@@ -25,7 +25,7 @@ cudaStreamBeginCapture, cudaStreamEndCapture, cudaLaunchHostFunc, cudaGraphCreat
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj b/Samples/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj
index 5a48206e..7b8fa0ca 100644
--- a/Samples/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj
+++ b/Samples/simpleCudaGraphs/simpleCudaGraphs_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj b/Samples/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj
index e7aeecd2..348817a8 100644
--- a/Samples/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj
+++ b/Samples/simpleCudaGraphs/simpleCudaGraphs_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleD3D11/README.md b/Samples/simpleD3D11/README.md
index eb8d6428..bd59b087 100644
--- a/Samples/simpleD3D11/README.md
+++ b/Samples/simpleD3D11/README.md
@@ -30,7 +30,7 @@ cudaD3D11GetDevice, cudaImportExternalSemaphore, cudaImportExternalMemory, cudaE
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/simpleD3D11/simpleD3D11_vs2017.vcxproj b/Samples/simpleD3D11/simpleD3D11_vs2017.vcxproj
index 651c1a4c..882d1481 100644
--- a/Samples/simpleD3D11/simpleD3D11_vs2017.vcxproj
+++ b/Samples/simpleD3D11/simpleD3D11_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleD3D11/simpleD3D11_vs2019.vcxproj b/Samples/simpleD3D11/simpleD3D11_vs2019.vcxproj
index c3bcdbe4..95f974e1 100644
--- a/Samples/simpleD3D11/simpleD3D11_vs2019.vcxproj
+++ b/Samples/simpleD3D11/simpleD3D11_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleD3D12/README.md b/Samples/simpleD3D12/README.md
index 4c60ed5b..065c9360 100644
--- a/Samples/simpleD3D12/README.md
+++ b/Samples/simpleD3D12/README.md
@@ -30,7 +30,7 @@ cudaWaitExternalSemaphoresAsync, cudaSignalExternalSemaphoresAsync, cudaImportEx
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/simpleD3D12/simpleD3D12_vs2017.vcxproj b/Samples/simpleD3D12/simpleD3D12_vs2017.vcxproj
index e0bee149..3804d197 100644
--- a/Samples/simpleD3D12/simpleD3D12_vs2017.vcxproj
+++ b/Samples/simpleD3D12/simpleD3D12_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -120,6 +120,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleD3D12/simpleD3D12_vs2019.vcxproj b/Samples/simpleD3D12/simpleD3D12_vs2019.vcxproj
index ec724d05..e97672dc 100644
--- a/Samples/simpleD3D12/simpleD3D12_vs2019.vcxproj
+++ b/Samples/simpleD3D12/simpleD3D12_vs2019.vcxproj
@@ -39,7 +39,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -121,6 +121,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleDrvRuntime/README.md b/Samples/simpleDrvRuntime/README.md
index 8c09f93b..9ea98174 100644
--- a/Samples/simpleDrvRuntime/README.md
+++ b/Samples/simpleDrvRuntime/README.md
@@ -30,7 +30,7 @@ cudaMemcpy, cudaMalloc, cudaStreamCreateWithFlags
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj b/Samples/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj
index 94e29419..9e1d0a4a 100644
--- a/Samples/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj
+++ b/Samples/simpleDrvRuntime/simpleDrvRuntime_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -112,6 +112,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj b/Samples/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj
index 42556e0c..554ac904 100644
--- a/Samples/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj
+++ b/Samples/simpleDrvRuntime/simpleDrvRuntime_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleGL/README.md b/Samples/simpleGL/README.md
index 5176ee1b..008c0be6 100644
--- a/Samples/simpleGL/README.md
+++ b/Samples/simpleGL/README.md
@@ -30,7 +30,7 @@ cudaGraphicsMapResources, cudaGraphicsUnmapResources, cudaGraphicsResourceGetMap
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/simpleGL/simpleGL_vs2017.vcxproj b/Samples/simpleGL/simpleGL_vs2017.vcxproj
index d096e815..92e84cc4 100644
--- a/Samples/simpleGL/simpleGL_vs2017.vcxproj
+++ b/Samples/simpleGL/simpleGL_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleGL/simpleGL_vs2019.vcxproj b/Samples/simpleGL/simpleGL_vs2019.vcxproj
index 527b22d0..60eab5e8 100644
--- a/Samples/simpleGL/simpleGL_vs2019.vcxproj
+++ b/Samples/simpleGL/simpleGL_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleIPC/README.md b/Samples/simpleIPC/README.md
index 3fcb740a..cf35bf0d 100644
--- a/Samples/simpleIPC/README.md
+++ b/Samples/simpleIPC/README.md
@@ -30,7 +30,7 @@ cudaIpcGetEventHandle, cudaIpcOpenMemHandle, cudaIpcCloseMemHandle, cudaMemcpyAs
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/simpleIPC/simpleIPC_vs2017.vcxproj b/Samples/simpleIPC/simpleIPC_vs2017.vcxproj
index 640802b1..e3345f52 100644
--- a/Samples/simpleIPC/simpleIPC_vs2017.vcxproj
+++ b/Samples/simpleIPC/simpleIPC_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleIPC/simpleIPC_vs2019.vcxproj b/Samples/simpleIPC/simpleIPC_vs2019.vcxproj
index 8c03b709..b176762f 100644
--- a/Samples/simpleIPC/simpleIPC_vs2019.vcxproj
+++ b/Samples/simpleIPC/simpleIPC_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -105,6 +105,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleVoteIntrinsics/README.md b/Samples/simpleVoteIntrinsics/README.md
index 314de841..162a13fe 100644
--- a/Samples/simpleVoteIntrinsics/README.md
+++ b/Samples/simpleVoteIntrinsics/README.md
@@ -27,7 +27,7 @@ cudaMalloc, cudaFree, cudaMemcpy, cudaFreeHost
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj b/Samples/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj
index 62b48298..908d81ac 100644
--- a/Samples/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj
+++ b/Samples/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj b/Samples/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj
index 630c35e5..a8bfac7b 100644
--- a/Samples/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj
+++ b/Samples/simpleVoteIntrinsics/simpleVoteIntrinsics_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleVulkan/README.md b/Samples/simpleVulkan/README.md
index 4cbb0122..91a4f186 100644
--- a/Samples/simpleVulkan/README.md
+++ b/Samples/simpleVulkan/README.md
@@ -30,7 +30,7 @@ cudaImportExternalMemory, cudaExternalMemoryGetMappedBuffer, cudaImportExternalS
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/simpleVulkan/simpleVulkan_vs2017.vcxproj b/Samples/simpleVulkan/simpleVulkan_vs2017.vcxproj
index 713ae122..069fdc1e 100644
--- a/Samples/simpleVulkan/simpleVulkan_vs2017.vcxproj
+++ b/Samples/simpleVulkan/simpleVulkan_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -122,6 +122,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleVulkan/simpleVulkan_vs2019.vcxproj b/Samples/simpleVulkan/simpleVulkan_vs2019.vcxproj
index a03ea3de..88d61caf 100644
--- a/Samples/simpleVulkan/simpleVulkan_vs2019.vcxproj
+++ b/Samples/simpleVulkan/simpleVulkan_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleVulkanMMAP/README.md b/Samples/simpleVulkanMMAP/README.md
index bd3aeb63..52f4b74f 100644
--- a/Samples/simpleVulkanMMAP/README.md
+++ b/Samples/simpleVulkanMMAP/README.md
@@ -33,7 +33,7 @@ cudaGetDeviceProperties, cudaImportExternalMemory, cudaExternalMemoryGetMappedBu
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj b/Samples/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj
index 95ec4011..ca7b8dec 100644
--- a/Samples/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj
+++ b/Samples/simpleVulkanMMAP/simpleVulkanMMAP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -124,6 +124,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj b/Samples/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj
index c15cb955..2537bca1 100644
--- a/Samples/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj
+++ b/Samples/simpleVulkanMMAP/simpleVulkanMMAP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -120,6 +120,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleZeroCopy/README.md b/Samples/simpleZeroCopy/README.md
index 12919ca0..8172982d 100644
--- a/Samples/simpleZeroCopy/README.md
+++ b/Samples/simpleZeroCopy/README.md
@@ -27,7 +27,7 @@ cudaEventCreate, cudaEventRecord, cudaEventQuery, cudaEventDestroy, cudaEventEla
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj b/Samples/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj
index 72ad3aaa..8c74cd84 100644
--- a/Samples/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj
+++ b/Samples/simpleZeroCopy/simpleZeroCopy_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj b/Samples/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj
index 40b06783..f96996d8 100644
--- a/Samples/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj
+++ b/Samples/simpleZeroCopy/simpleZeroCopy_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/streamOrderedAllocation/README.md b/Samples/streamOrderedAllocation/README.md
index be8d5602..c2cafafe 100644
--- a/Samples/streamOrderedAllocation/README.md
+++ b/Samples/streamOrderedAllocation/README.md
@@ -27,7 +27,7 @@ cudaMallocAsync, cudaFreeAsync, cudaMemPoolSetAttribute, cudaDeviceGetDefaultMem
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj b/Samples/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj
index 1113cafe..33b24ecd 100644
--- a/Samples/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj
+++ b/Samples/streamOrderedAllocation/streamOrderedAllocation_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj b/Samples/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj
index 5e884ceb..046057bc 100644
--- a/Samples/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj
+++ b/Samples/streamOrderedAllocation/streamOrderedAllocation_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/streamOrderedAllocationIPC/README.md b/Samples/streamOrderedAllocationIPC/README.md
index 04948fae..3ac4a03c 100644
--- a/Samples/streamOrderedAllocationIPC/README.md
+++ b/Samples/streamOrderedAllocationIPC/README.md
@@ -27,7 +27,7 @@ cudaMallocAsync, cudaFreeAsync, cudaMemPoolCreate, cudaMemPoolImportPointer, cud
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/streamOrderedAllocationP2P/README.md b/Samples/streamOrderedAllocationP2P/README.md
index 164284a9..4b03cbdc 100644
--- a/Samples/streamOrderedAllocationP2P/README.md
+++ b/Samples/streamOrderedAllocationP2P/README.md
@@ -27,7 +27,7 @@ cudaMallocAsync, cudaFreeAsync, cudaMemPoolSetAccess, cudaDeviceGetDefaultMemPoo
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj b/Samples/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj
index 116d3c9c..4d56b360 100644
--- a/Samples/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj
+++ b/Samples/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj b/Samples/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj
index 50529ea0..b6b72aed 100644
--- a/Samples/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj
+++ b/Samples/streamOrderedAllocationP2P/streamOrderedAllocationP2P_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/systemWideAtomics/README.md b/Samples/systemWideAtomics/README.md
index 530df9d3..115d8ac7 100644
--- a/Samples/systemWideAtomics/README.md
+++ b/Samples/systemWideAtomics/README.md
@@ -30,7 +30,7 @@ cudaMalloc, cudaFree, cudaMemcpy, cudaFreeHost
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/tf32TensorCoreGemm/README.md b/Samples/tf32TensorCoreGemm/README.md
index c1513be1..72577bdf 100644
--- a/Samples/tf32TensorCoreGemm/README.md
+++ b/Samples/tf32TensorCoreGemm/README.md
@@ -27,7 +27,7 @@ cudaMalloc, cudaDeviceSynchronize, cudaFuncSetAttribute, cudaEventCreate, cudaEv
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj b/Samples/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj
index 4cd44a20..305b25b5 100644
--- a/Samples/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj
+++ b/Samples/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj b/Samples/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj
index 5ed41711..731b304a 100644
--- a/Samples/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj
+++ b/Samples/tf32TensorCoreGemm/tf32TensorCoreGemm_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/vectorAddMMAP/README.md b/Samples/vectorAddMMAP/README.md
index c385d627..24783aef 100644
--- a/Samples/vectorAddMMAP/README.md
+++ b/Samples/vectorAddMMAP/README.md
@@ -27,7 +27,7 @@ cuModuleLoad, cuModuleLoadDataEx, cuModuleGetFunction, cuMemAlloc, cuMemFree, cu
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj b/Samples/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj
index 39f9ba5b..2e987db6 100644
--- a/Samples/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj
+++ b/Samples/vectorAddMMAP/vectorAddMMAP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -113,6 +113,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj b/Samples/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj
index ccc98fe0..22b8a10d 100644
--- a/Samples/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj
+++ b/Samples/vectorAddMMAP/vectorAddMMAP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -109,6 +109,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/vectorAdd_nvrtc/README.md b/Samples/vectorAdd_nvrtc/README.md
index 64132b80..8520e04b 100644
--- a/Samples/vectorAdd_nvrtc/README.md
+++ b/Samples/vectorAdd_nvrtc/README.md
@@ -30,7 +30,7 @@ cuMemAlloc, cuMemFree, cuMemcpyHtoD, cuMemcpyDtoH
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj b/Samples/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj
index 3fecdd25..9cb2e2c1 100644
--- a/Samples/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj
+++ b/Samples/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj b/Samples/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj
index 697e47aa..72d19488 100644
--- a/Samples/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj
+++ b/Samples/vectorAdd_nvrtc/vectorAdd_nvrtc_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/vulkanImageCUDA/README.md b/Samples/vulkanImageCUDA/README.md
index f37e25f3..aae79199 100644
--- a/Samples/vulkanImageCUDA/README.md
+++ b/Samples/vulkanImageCUDA/README.md
@@ -30,7 +30,7 @@ cudaImportExternalMemory, cudaExternalMemoryGetMappedMipmappedArray, cudaImportE
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj b/Samples/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj
index f14a0515..2bfc68f7 100644
--- a/Samples/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj
+++ b/Samples/vulkanImageCUDA/vulkanImageCUDA_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -118,6 +118,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj b/Samples/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj
index 42673869..655c8105 100644
--- a/Samples/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj
+++ b/Samples/vulkanImageCUDA/vulkanImageCUDA_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -114,6 +114,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/warpAggregatedAtomicsCG/README.md b/Samples/warpAggregatedAtomicsCG/README.md
index c4c351a7..3344aa18 100644
--- a/Samples/warpAggregatedAtomicsCG/README.md
+++ b/Samples/warpAggregatedAtomicsCG/README.md
@@ -22,7 +22,7 @@ x86_64, ppc64le, armv7l, aarch64
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 
 ## Build and Run
 
diff --git a/Samples/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj b/Samples/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj
index 6a6c8655..018e27e0 100644
--- a/Samples/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj
+++ b/Samples/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj b/Samples/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj
index 6e83354a..f22e4976 100644
--- a/Samples/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj
+++ b/Samples/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/watershedSegmentationNPP/Makefile b/Samples/watershedSegmentationNPP/Makefile
index c03a879d..a65719dc 100644
--- a/Samples/watershedSegmentationNPP/Makefile
+++ b/Samples/watershedSegmentationNPP/Makefile
@@ -271,12 +271,6 @@ ifeq ($(TARGET_OS),darwin)
   SAMPLE_ENABLED := 0
 endif
 
-# This sample is not supported on QNX
-ifeq ($(TARGET_OS),qnx)
-  $(info >>> WARNING - watershedSegmentationNPP is not supported on QNX - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
 ALL_LDFLAGS :=
 ALL_LDFLAGS += $(ALL_CCFLAGS)
 ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
diff --git a/Samples/watershedSegmentationNPP/README.md b/Samples/watershedSegmentationNPP/README.md
index 0b320280..96169587 100644
--- a/Samples/watershedSegmentationNPP/README.md
+++ b/Samples/watershedSegmentationNPP/README.md
@@ -27,7 +27,7 @@ x86_64, ppc64le, armv7l
 
 ## Prerequisites
 
-Download and install the [CUDA Toolkit 11.3](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 
 ## Build and Run
diff --git a/Samples/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj b/Samples/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj
index 548b5361..9fa9dbb1 100644
--- a/Samples/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj
+++ b/Samples/watershedSegmentationNPP/watershedSegmentationNPP_vs2017.vcxproj
@@ -38,7 +38,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -108,6 +108,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>
diff --git a/Samples/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj b/Samples/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj
index ee297d72..97d89664 100644
--- a/Samples/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj
+++ b/Samples/watershedSegmentationNPP/watershedSegmentationNPP_vs2019.vcxproj
@@ -34,7 +34,7 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.props" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.props" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets">
     <Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@@ -104,6 +104,6 @@
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
-    <Import Project="$(CUDAPropsPath)\CUDA 11.3.targets" />
+    <Import Project="$(CUDAPropsPath)\CUDA 11.4.targets" />
   </ImportGroup>
 </Project>